{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 3188, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006273525721455458, "grad_norm": 4.55400276184082, "learning_rate": 0.0, "loss": 1.5808, "memory/device_mem_reserved(gib)": 63.4, "memory/max_mem_active(gib)": 59.02, "memory/max_mem_allocated(gib)": 57.84, "step": 1 }, { "epoch": 0.0012547051442910915, "grad_norm": 7.68040132522583, "learning_rate": 1.5723270440251573e-08, "loss": 1.474, "memory/device_mem_reserved(gib)": 66.65, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2 }, { "epoch": 0.0018820577164366374, "grad_norm": 10.7412691116333, "learning_rate": 3.1446540880503146e-08, "loss": 1.5559, "memory/device_mem_reserved(gib)": 66.65, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3 }, { "epoch": 0.002509410288582183, "grad_norm": 11.039576530456543, "learning_rate": 4.716981132075472e-08, "loss": 1.4446, "memory/device_mem_reserved(gib)": 66.65, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 4 }, { "epoch": 0.003136762860727729, "grad_norm": 6.22249174118042, "learning_rate": 6.289308176100629e-08, "loss": 1.515, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 5 }, { "epoch": 0.0037641154328732747, "grad_norm": 9.491832733154297, "learning_rate": 7.861635220125787e-08, "loss": 1.6619, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 6 }, { "epoch": 0.00439146800501882, "grad_norm": 5.316587924957275, "learning_rate": 9.433962264150944e-08, "loss": 1.4984, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 7 }, { "epoch": 0.005018820577164366, "grad_norm": 10.27668285369873, "learning_rate": 1.1006289308176101e-07, "loss": 1.459, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 8 }, { "epoch": 0.0056461731493099125, "grad_norm": 10.63105297088623, "learning_rate": 1.2578616352201258e-07, "loss": 1.5212, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 9 }, { "epoch": 0.006273525721455458, "grad_norm": 10.851506233215332, "learning_rate": 1.4150943396226417e-07, "loss": 1.5022, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 10 }, { "epoch": 0.006900878293601004, "grad_norm": 11.121560096740723, "learning_rate": 1.5723270440251575e-07, "loss": 1.4822, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 11 }, { "epoch": 0.0075282308657465494, "grad_norm": 11.112455368041992, "learning_rate": 1.729559748427673e-07, "loss": 1.4818, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 12 }, { "epoch": 0.008155583437892095, "grad_norm": 10.980988502502441, "learning_rate": 1.886792452830189e-07, "loss": 1.5752, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 13 }, { "epoch": 0.00878293601003764, "grad_norm": 11.245274543762207, "learning_rate": 2.0440251572327044e-07, "loss": 1.6465, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 14 }, { "epoch": 0.009410288582183186, "grad_norm": 8.604133605957031, "learning_rate": 2.2012578616352203e-07, "loss": 1.6738, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 15 }, { "epoch": 0.010037641154328732, "grad_norm": 10.810846328735352, "learning_rate": 2.358490566037736e-07, "loss": 1.6457, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 16 }, { "epoch": 0.01066499372647428, "grad_norm": 10.287492752075195, "learning_rate": 2.5157232704402517e-07, "loss": 1.5768, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 17 }, { "epoch": 0.011292346298619825, "grad_norm": 6.959763050079346, "learning_rate": 2.672955974842768e-07, "loss": 1.5683, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 18 }, { "epoch": 0.01191969887076537, "grad_norm": 12.073090553283691, "learning_rate": 2.8301886792452833e-07, "loss": 1.5883, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 19 }, { "epoch": 0.012547051442910916, "grad_norm": 12.099584579467773, "learning_rate": 2.987421383647799e-07, "loss": 1.5578, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 20 }, { "epoch": 0.013174404015056462, "grad_norm": 9.198031425476074, "learning_rate": 3.144654088050315e-07, "loss": 1.5087, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 21 }, { "epoch": 0.013801756587202008, "grad_norm": 10.079482078552246, "learning_rate": 3.3018867924528305e-07, "loss": 1.5111, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 22 }, { "epoch": 0.014429109159347553, "grad_norm": 12.30569076538086, "learning_rate": 3.459119496855346e-07, "loss": 1.6295, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 23 }, { "epoch": 0.015056461731493099, "grad_norm": 12.143986701965332, "learning_rate": 3.6163522012578617e-07, "loss": 1.6378, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 24 }, { "epoch": 0.015683814303638646, "grad_norm": 10.82630729675293, "learning_rate": 3.773584905660378e-07, "loss": 1.6042, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 25 }, { "epoch": 0.01631116687578419, "grad_norm": 10.716633796691895, "learning_rate": 3.9308176100628933e-07, "loss": 1.6034, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 26 }, { "epoch": 0.016938519447929738, "grad_norm": 11.65971565246582, "learning_rate": 4.088050314465409e-07, "loss": 1.5007, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 27 }, { "epoch": 0.01756587202007528, "grad_norm": 8.470008850097656, "learning_rate": 4.2452830188679244e-07, "loss": 1.671, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 28 }, { "epoch": 0.01819322459222083, "grad_norm": 8.622583389282227, "learning_rate": 4.4025157232704405e-07, "loss": 1.4135, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 29 }, { "epoch": 0.018820577164366373, "grad_norm": 8.237770080566406, "learning_rate": 4.559748427672956e-07, "loss": 1.5056, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 30 }, { "epoch": 0.01944792973651192, "grad_norm": 10.933939933776855, "learning_rate": 4.716981132075472e-07, "loss": 1.5356, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 31 }, { "epoch": 0.020075282308657464, "grad_norm": 11.000118255615234, "learning_rate": 4.874213836477988e-07, "loss": 1.5723, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 32 }, { "epoch": 0.02070263488080301, "grad_norm": 9.162322044372559, "learning_rate": 5.031446540880503e-07, "loss": 1.5293, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 33 }, { "epoch": 0.02132998745294856, "grad_norm": 9.679073333740234, "learning_rate": 5.18867924528302e-07, "loss": 1.6125, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 34 }, { "epoch": 0.021957340025094103, "grad_norm": 8.7017183303833, "learning_rate": 5.345911949685535e-07, "loss": 1.6079, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 35 }, { "epoch": 0.02258469259723965, "grad_norm": 7.953933238983154, "learning_rate": 5.503144654088051e-07, "loss": 1.4798, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 36 }, { "epoch": 0.023212045169385194, "grad_norm": 8.445432662963867, "learning_rate": 5.660377358490567e-07, "loss": 1.4155, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 37 }, { "epoch": 0.02383939774153074, "grad_norm": 9.527459144592285, "learning_rate": 5.817610062893082e-07, "loss": 1.556, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 38 }, { "epoch": 0.024466750313676285, "grad_norm": 9.838929176330566, "learning_rate": 5.974842767295598e-07, "loss": 1.6388, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 39 }, { "epoch": 0.025094102885821833, "grad_norm": 7.818563461303711, "learning_rate": 6.132075471698113e-07, "loss": 1.6555, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 40 }, { "epoch": 0.025721455457967377, "grad_norm": 6.137913227081299, "learning_rate": 6.28930817610063e-07, "loss": 1.4206, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 41 }, { "epoch": 0.026348808030112924, "grad_norm": 9.544657707214355, "learning_rate": 6.446540880503145e-07, "loss": 1.5458, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 42 }, { "epoch": 0.026976160602258468, "grad_norm": 9.101583480834961, "learning_rate": 6.603773584905661e-07, "loss": 1.6298, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 43 }, { "epoch": 0.027603513174404015, "grad_norm": 6.088347434997559, "learning_rate": 6.761006289308177e-07, "loss": 1.5088, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 44 }, { "epoch": 0.028230865746549563, "grad_norm": 8.540587425231934, "learning_rate": 6.918238993710692e-07, "loss": 1.4617, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 45 }, { "epoch": 0.028858218318695106, "grad_norm": 8.382627487182617, "learning_rate": 7.075471698113208e-07, "loss": 1.5994, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 46 }, { "epoch": 0.029485570890840654, "grad_norm": 8.526695251464844, "learning_rate": 7.232704402515723e-07, "loss": 1.4959, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 47 }, { "epoch": 0.030112923462986198, "grad_norm": 4.549665451049805, "learning_rate": 7.389937106918239e-07, "loss": 1.3477, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 48 }, { "epoch": 0.030740276035131745, "grad_norm": 6.675533771514893, "learning_rate": 7.547169811320755e-07, "loss": 1.3748, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 49 }, { "epoch": 0.03136762860727729, "grad_norm": 6.710190773010254, "learning_rate": 7.704402515723271e-07, "loss": 1.4118, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 50 }, { "epoch": 0.031994981179422836, "grad_norm": 5.698192119598389, "learning_rate": 7.861635220125787e-07, "loss": 1.5622, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 51 }, { "epoch": 0.03262233375156838, "grad_norm": 5.854632377624512, "learning_rate": 8.018867924528302e-07, "loss": 1.4148, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 52 }, { "epoch": 0.033249686323713924, "grad_norm": 5.814241409301758, "learning_rate": 8.176100628930818e-07, "loss": 1.4957, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 53 }, { "epoch": 0.033877038895859475, "grad_norm": 5.345610618591309, "learning_rate": 8.333333333333333e-07, "loss": 1.5477, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 54 }, { "epoch": 0.03450439146800502, "grad_norm": 4.159165859222412, "learning_rate": 8.490566037735849e-07, "loss": 1.6153, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 55 }, { "epoch": 0.03513174404015056, "grad_norm": 4.605583667755127, "learning_rate": 8.647798742138364e-07, "loss": 1.5115, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 56 }, { "epoch": 0.035759096612296114, "grad_norm": 4.393484115600586, "learning_rate": 8.805031446540881e-07, "loss": 1.4937, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 57 }, { "epoch": 0.03638644918444166, "grad_norm": 4.844731330871582, "learning_rate": 8.962264150943397e-07, "loss": 1.4964, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 58 }, { "epoch": 0.0370138017565872, "grad_norm": 5.815843105316162, "learning_rate": 9.119496855345912e-07, "loss": 1.3776, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 59 }, { "epoch": 0.037641154328732745, "grad_norm": 4.103832721710205, "learning_rate": 9.276729559748428e-07, "loss": 1.5472, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 60 }, { "epoch": 0.038268506900878296, "grad_norm": 4.0624003410339355, "learning_rate": 9.433962264150944e-07, "loss": 1.4647, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 61 }, { "epoch": 0.03889585947302384, "grad_norm": 3.943577527999878, "learning_rate": 9.59119496855346e-07, "loss": 1.4935, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 62 }, { "epoch": 0.039523212045169384, "grad_norm": 3.511507034301758, "learning_rate": 9.748427672955975e-07, "loss": 1.481, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 63 }, { "epoch": 0.04015056461731493, "grad_norm": 3.368314027786255, "learning_rate": 9.90566037735849e-07, "loss": 1.436, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 64 }, { "epoch": 0.04077791718946048, "grad_norm": 6.15948486328125, "learning_rate": 1.0062893081761007e-06, "loss": 1.4033, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 65 }, { "epoch": 0.04140526976160602, "grad_norm": 3.0553529262542725, "learning_rate": 1.0220125786163524e-06, "loss": 1.4416, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 66 }, { "epoch": 0.04203262233375157, "grad_norm": 3.3963119983673096, "learning_rate": 1.037735849056604e-06, "loss": 1.3565, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 67 }, { "epoch": 0.04265997490589712, "grad_norm": 3.019916534423828, "learning_rate": 1.0534591194968555e-06, "loss": 1.5527, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 68 }, { "epoch": 0.04328732747804266, "grad_norm": 2.330040693283081, "learning_rate": 1.069182389937107e-06, "loss": 1.3976, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 69 }, { "epoch": 0.043914680050188205, "grad_norm": 3.104090929031372, "learning_rate": 1.0849056603773587e-06, "loss": 1.3887, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 70 }, { "epoch": 0.04454203262233375, "grad_norm": 2.3439829349517822, "learning_rate": 1.1006289308176102e-06, "loss": 1.3351, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 71 }, { "epoch": 0.0451693851944793, "grad_norm": 2.272207021713257, "learning_rate": 1.1163522012578618e-06, "loss": 1.4638, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 72 }, { "epoch": 0.045796737766624844, "grad_norm": 3.1712934970855713, "learning_rate": 1.1320754716981133e-06, "loss": 1.4946, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 73 }, { "epoch": 0.04642409033877039, "grad_norm": 2.8735315799713135, "learning_rate": 1.1477987421383649e-06, "loss": 1.3761, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 74 }, { "epoch": 0.04705144291091593, "grad_norm": 2.4971110820770264, "learning_rate": 1.1635220125786164e-06, "loss": 1.3517, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 75 }, { "epoch": 0.04767879548306148, "grad_norm": 3.0293681621551514, "learning_rate": 1.179245283018868e-06, "loss": 1.3868, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 76 }, { "epoch": 0.04830614805520703, "grad_norm": 2.4946517944335938, "learning_rate": 1.1949685534591195e-06, "loss": 1.432, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 77 }, { "epoch": 0.04893350062735257, "grad_norm": 2.731900691986084, "learning_rate": 1.210691823899371e-06, "loss": 1.4344, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 78 }, { "epoch": 0.04956085319949812, "grad_norm": 2.6176462173461914, "learning_rate": 1.2264150943396227e-06, "loss": 1.5104, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 79 }, { "epoch": 0.050188205771643665, "grad_norm": 2.2624993324279785, "learning_rate": 1.2421383647798742e-06, "loss": 1.3613, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 80 }, { "epoch": 0.05081555834378921, "grad_norm": 2.183051347732544, "learning_rate": 1.257861635220126e-06, "loss": 1.466, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 81 }, { "epoch": 0.05144291091593475, "grad_norm": 2.4126477241516113, "learning_rate": 1.2735849056603775e-06, "loss": 1.4675, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 82 }, { "epoch": 0.052070263488080304, "grad_norm": 2.0408334732055664, "learning_rate": 1.289308176100629e-06, "loss": 1.5206, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 83 }, { "epoch": 0.05269761606022585, "grad_norm": 2.421983242034912, "learning_rate": 1.3050314465408807e-06, "loss": 1.3339, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 84 }, { "epoch": 0.05332496863237139, "grad_norm": 2.067976236343384, "learning_rate": 1.3207547169811322e-06, "loss": 1.2658, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 85 }, { "epoch": 0.053952321204516936, "grad_norm": 2.595181465148926, "learning_rate": 1.3364779874213838e-06, "loss": 1.4028, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 86 }, { "epoch": 0.054579673776662486, "grad_norm": 2.261770486831665, "learning_rate": 1.3522012578616353e-06, "loss": 1.4594, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 87 }, { "epoch": 0.05520702634880803, "grad_norm": 1.8053139448165894, "learning_rate": 1.3679245283018869e-06, "loss": 1.4086, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 88 }, { "epoch": 0.055834378920953574, "grad_norm": 2.1403069496154785, "learning_rate": 1.3836477987421384e-06, "loss": 1.4465, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 89 }, { "epoch": 0.056461731493099125, "grad_norm": 1.8229817152023315, "learning_rate": 1.39937106918239e-06, "loss": 1.2884, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 90 }, { "epoch": 0.05708908406524467, "grad_norm": 2.258371591567993, "learning_rate": 1.4150943396226415e-06, "loss": 1.5038, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 91 }, { "epoch": 0.05771643663739021, "grad_norm": 2.1894209384918213, "learning_rate": 1.430817610062893e-06, "loss": 1.4794, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 92 }, { "epoch": 0.05834378920953576, "grad_norm": 2.3533987998962402, "learning_rate": 1.4465408805031447e-06, "loss": 1.44, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 93 }, { "epoch": 0.05897114178168131, "grad_norm": 2.299332618713379, "learning_rate": 1.4622641509433962e-06, "loss": 1.3913, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 94 }, { "epoch": 0.05959849435382685, "grad_norm": 4.128480911254883, "learning_rate": 1.4779874213836478e-06, "loss": 1.3982, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 95 }, { "epoch": 0.060225846925972396, "grad_norm": 2.248939037322998, "learning_rate": 1.4937106918238995e-06, "loss": 1.483, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 96 }, { "epoch": 0.06085319949811794, "grad_norm": 1.1961462497711182, "learning_rate": 1.509433962264151e-06, "loss": 1.2478, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 97 }, { "epoch": 0.06148055207026349, "grad_norm": 2.1051137447357178, "learning_rate": 1.5251572327044027e-06, "loss": 1.3615, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 98 }, { "epoch": 0.062107904642409034, "grad_norm": 2.1795237064361572, "learning_rate": 1.5408805031446542e-06, "loss": 1.4017, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 99 }, { "epoch": 0.06273525721455459, "grad_norm": 1.4975708723068237, "learning_rate": 1.5566037735849058e-06, "loss": 1.2858, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 100 }, { "epoch": 0.06336260978670012, "grad_norm": 2.0895345211029053, "learning_rate": 1.5723270440251573e-06, "loss": 1.3706, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 101 }, { "epoch": 0.06398996235884567, "grad_norm": 2.0322093963623047, "learning_rate": 1.5880503144654089e-06, "loss": 1.3236, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 102 }, { "epoch": 0.06461731493099122, "grad_norm": 2.1316373348236084, "learning_rate": 1.6037735849056604e-06, "loss": 1.4114, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 103 }, { "epoch": 0.06524466750313676, "grad_norm": 1.7316523790359497, "learning_rate": 1.619496855345912e-06, "loss": 1.2191, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 104 }, { "epoch": 0.06587202007528231, "grad_norm": 2.152735710144043, "learning_rate": 1.6352201257861635e-06, "loss": 1.3812, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 105 }, { "epoch": 0.06649937264742785, "grad_norm": 1.5850223302841187, "learning_rate": 1.650943396226415e-06, "loss": 1.1812, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 106 }, { "epoch": 0.0671267252195734, "grad_norm": 1.7916295528411865, "learning_rate": 1.6666666666666667e-06, "loss": 1.4622, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 107 }, { "epoch": 0.06775407779171895, "grad_norm": 2.106351375579834, "learning_rate": 1.6823899371069182e-06, "loss": 1.4152, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 108 }, { "epoch": 0.06838143036386449, "grad_norm": 2.268888235092163, "learning_rate": 1.6981132075471698e-06, "loss": 1.2981, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 109 }, { "epoch": 0.06900878293601004, "grad_norm": 1.7028484344482422, "learning_rate": 1.7138364779874213e-06, "loss": 1.5788, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 110 }, { "epoch": 0.06963613550815559, "grad_norm": 1.8516031503677368, "learning_rate": 1.7295597484276729e-06, "loss": 1.4352, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 111 }, { "epoch": 0.07026348808030113, "grad_norm": 2.121943235397339, "learning_rate": 1.7452830188679247e-06, "loss": 1.3917, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 112 }, { "epoch": 0.07089084065244668, "grad_norm": 2.1828010082244873, "learning_rate": 1.7610062893081762e-06, "loss": 1.322, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 113 }, { "epoch": 0.07151819322459223, "grad_norm": 2.0953001976013184, "learning_rate": 1.7767295597484278e-06, "loss": 1.3502, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 114 }, { "epoch": 0.07214554579673776, "grad_norm": 1.852333426475525, "learning_rate": 1.7924528301886793e-06, "loss": 1.2831, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 115 }, { "epoch": 0.07277289836888332, "grad_norm": 5.071216106414795, "learning_rate": 1.8081761006289309e-06, "loss": 1.4053, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 116 }, { "epoch": 0.07340025094102885, "grad_norm": 1.746447205543518, "learning_rate": 1.8238993710691824e-06, "loss": 1.3858, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 117 }, { "epoch": 0.0740276035131744, "grad_norm": 1.3724796772003174, "learning_rate": 1.839622641509434e-06, "loss": 1.4171, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 118 }, { "epoch": 0.07465495608531995, "grad_norm": 1.8758927583694458, "learning_rate": 1.8553459119496855e-06, "loss": 1.4434, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 119 }, { "epoch": 0.07528230865746549, "grad_norm": 1.7063547372817993, "learning_rate": 1.871069182389937e-06, "loss": 1.3846, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 120 }, { "epoch": 0.07590966122961104, "grad_norm": 1.6393623352050781, "learning_rate": 1.8867924528301889e-06, "loss": 1.4012, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 121 }, { "epoch": 0.07653701380175659, "grad_norm": 1.9628385305404663, "learning_rate": 1.9025157232704406e-06, "loss": 1.4207, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 122 }, { "epoch": 0.07716436637390213, "grad_norm": 1.8672785758972168, "learning_rate": 1.918238993710692e-06, "loss": 1.3259, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 123 }, { "epoch": 0.07779171894604768, "grad_norm": 1.5363185405731201, "learning_rate": 1.9339622641509438e-06, "loss": 1.3228, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 124 }, { "epoch": 0.07841907151819323, "grad_norm": 2.0102760791778564, "learning_rate": 1.949685534591195e-06, "loss": 1.4722, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 125 }, { "epoch": 0.07904642409033877, "grad_norm": 2.0831358432769775, "learning_rate": 1.965408805031447e-06, "loss": 1.3728, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 126 }, { "epoch": 0.07967377666248432, "grad_norm": 1.8129521608352661, "learning_rate": 1.981132075471698e-06, "loss": 1.4643, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 127 }, { "epoch": 0.08030112923462986, "grad_norm": 4.2833380699157715, "learning_rate": 1.99685534591195e-06, "loss": 1.266, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 128 }, { "epoch": 0.0809284818067754, "grad_norm": 2.023083448410034, "learning_rate": 2.0125786163522013e-06, "loss": 1.3666, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 129 }, { "epoch": 0.08155583437892096, "grad_norm": 2.003934144973755, "learning_rate": 2.028301886792453e-06, "loss": 1.3563, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 130 }, { "epoch": 0.0821831869510665, "grad_norm": 1.747253179550171, "learning_rate": 2.044025157232705e-06, "loss": 1.3771, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 131 }, { "epoch": 0.08281053952321205, "grad_norm": 1.769203782081604, "learning_rate": 2.059748427672956e-06, "loss": 1.3521, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 132 }, { "epoch": 0.0834378920953576, "grad_norm": 2.124824285507202, "learning_rate": 2.075471698113208e-06, "loss": 1.5221, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 133 }, { "epoch": 0.08406524466750313, "grad_norm": 2.0400173664093018, "learning_rate": 2.0911949685534593e-06, "loss": 1.3352, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 134 }, { "epoch": 0.08469259723964868, "grad_norm": 1.806525468826294, "learning_rate": 2.106918238993711e-06, "loss": 1.2829, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 135 }, { "epoch": 0.08531994981179424, "grad_norm": 1.6417590379714966, "learning_rate": 2.1226415094339624e-06, "loss": 1.4122, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 136 }, { "epoch": 0.08594730238393977, "grad_norm": 1.7979159355163574, "learning_rate": 2.138364779874214e-06, "loss": 1.3852, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 137 }, { "epoch": 0.08657465495608532, "grad_norm": 1.5453232526779175, "learning_rate": 2.1540880503144655e-06, "loss": 1.2895, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 138 }, { "epoch": 0.08720200752823086, "grad_norm": 2.0955095291137695, "learning_rate": 2.1698113207547173e-06, "loss": 1.2654, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 139 }, { "epoch": 0.08782936010037641, "grad_norm": 1.9225586652755737, "learning_rate": 2.1855345911949687e-06, "loss": 1.3794, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 140 }, { "epoch": 0.08845671267252196, "grad_norm": 1.9370880126953125, "learning_rate": 2.2012578616352204e-06, "loss": 1.2875, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 141 }, { "epoch": 0.0890840652446675, "grad_norm": 1.9599952697753906, "learning_rate": 2.2169811320754718e-06, "loss": 1.3845, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 142 }, { "epoch": 0.08971141781681305, "grad_norm": 1.8032351732254028, "learning_rate": 2.2327044025157235e-06, "loss": 1.2829, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 143 }, { "epoch": 0.0903387703889586, "grad_norm": 1.683026671409607, "learning_rate": 2.248427672955975e-06, "loss": 1.3368, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 144 }, { "epoch": 0.09096612296110414, "grad_norm": 1.9588282108306885, "learning_rate": 2.2641509433962266e-06, "loss": 1.5689, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 145 }, { "epoch": 0.09159347553324969, "grad_norm": 1.8645468950271606, "learning_rate": 2.2798742138364784e-06, "loss": 1.4278, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 146 }, { "epoch": 0.09222082810539524, "grad_norm": 1.889079213142395, "learning_rate": 2.2955974842767298e-06, "loss": 1.2831, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 147 }, { "epoch": 0.09284818067754078, "grad_norm": 1.615443468093872, "learning_rate": 2.3113207547169815e-06, "loss": 1.2778, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 148 }, { "epoch": 0.09347553324968633, "grad_norm": 2.068561315536499, "learning_rate": 2.327044025157233e-06, "loss": 1.3043, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 149 }, { "epoch": 0.09410288582183186, "grad_norm": 2.1261227130889893, "learning_rate": 2.3427672955974846e-06, "loss": 1.3721, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 150 }, { "epoch": 0.09473023839397741, "grad_norm": 1.621612310409546, "learning_rate": 2.358490566037736e-06, "loss": 1.4434, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 151 }, { "epoch": 0.09535759096612297, "grad_norm": 1.688284993171692, "learning_rate": 2.3742138364779878e-06, "loss": 1.2438, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 152 }, { "epoch": 0.0959849435382685, "grad_norm": 1.7449889183044434, "learning_rate": 2.389937106918239e-06, "loss": 1.2775, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 153 }, { "epoch": 0.09661229611041405, "grad_norm": 1.9485979080200195, "learning_rate": 2.405660377358491e-06, "loss": 1.3454, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 154 }, { "epoch": 0.0972396486825596, "grad_norm": 1.9239331483840942, "learning_rate": 2.421383647798742e-06, "loss": 1.3666, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 155 }, { "epoch": 0.09786700125470514, "grad_norm": 1.554479956626892, "learning_rate": 2.437106918238994e-06, "loss": 1.4377, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 156 }, { "epoch": 0.09849435382685069, "grad_norm": 1.6431803703308105, "learning_rate": 2.4528301886792453e-06, "loss": 1.3351, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 157 }, { "epoch": 0.09912170639899624, "grad_norm": 2.31290340423584, "learning_rate": 2.468553459119497e-06, "loss": 1.4224, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 158 }, { "epoch": 0.09974905897114178, "grad_norm": 1.8860207796096802, "learning_rate": 2.4842767295597484e-06, "loss": 1.3442, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 159 }, { "epoch": 0.10037641154328733, "grad_norm": 7.125235080718994, "learning_rate": 2.5e-06, "loss": 1.471, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 160 }, { "epoch": 0.10100376411543287, "grad_norm": 1.5071229934692383, "learning_rate": 2.515723270440252e-06, "loss": 1.2873, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 161 }, { "epoch": 0.10163111668757842, "grad_norm": 1.965579867362976, "learning_rate": 2.5314465408805033e-06, "loss": 1.5114, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 162 }, { "epoch": 0.10225846925972397, "grad_norm": 1.856428861618042, "learning_rate": 2.547169811320755e-06, "loss": 1.3347, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 163 }, { "epoch": 0.1028858218318695, "grad_norm": 2.000871419906616, "learning_rate": 2.5628930817610064e-06, "loss": 1.3671, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 164 }, { "epoch": 0.10351317440401506, "grad_norm": 3.1658947467803955, "learning_rate": 2.578616352201258e-06, "loss": 1.1915, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 165 }, { "epoch": 0.10414052697616061, "grad_norm": 1.8465994596481323, "learning_rate": 2.5943396226415095e-06, "loss": 1.5443, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 166 }, { "epoch": 0.10476787954830614, "grad_norm": 7.598590850830078, "learning_rate": 2.6100628930817613e-06, "loss": 1.3744, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 167 }, { "epoch": 0.1053952321204517, "grad_norm": 1.9127484560012817, "learning_rate": 2.6257861635220127e-06, "loss": 1.323, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 168 }, { "epoch": 0.10602258469259725, "grad_norm": 2.021780014038086, "learning_rate": 2.6415094339622644e-06, "loss": 1.3888, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 169 }, { "epoch": 0.10664993726474278, "grad_norm": 1.603609323501587, "learning_rate": 2.6572327044025158e-06, "loss": 1.3605, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 170 }, { "epoch": 0.10727728983688833, "grad_norm": 1.784544587135315, "learning_rate": 2.6729559748427675e-06, "loss": 1.3892, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 171 }, { "epoch": 0.10790464240903387, "grad_norm": 1.7793283462524414, "learning_rate": 2.688679245283019e-06, "loss": 1.4657, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 172 }, { "epoch": 0.10853199498117942, "grad_norm": 1.8067797422409058, "learning_rate": 2.7044025157232706e-06, "loss": 1.3574, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 173 }, { "epoch": 0.10915934755332497, "grad_norm": 1.8207420110702515, "learning_rate": 2.720125786163522e-06, "loss": 1.5282, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 174 }, { "epoch": 0.10978670012547051, "grad_norm": 1.8373732566833496, "learning_rate": 2.7358490566037738e-06, "loss": 1.3553, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 175 }, { "epoch": 0.11041405269761606, "grad_norm": 1.8807525634765625, "learning_rate": 2.7515723270440255e-06, "loss": 1.4968, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 176 }, { "epoch": 0.11104140526976161, "grad_norm": 1.7542294263839722, "learning_rate": 2.767295597484277e-06, "loss": 1.4336, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 177 }, { "epoch": 0.11166875784190715, "grad_norm": 1.7253546714782715, "learning_rate": 2.7830188679245286e-06, "loss": 1.3561, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 178 }, { "epoch": 0.1122961104140527, "grad_norm": 1.9645566940307617, "learning_rate": 2.79874213836478e-06, "loss": 1.3704, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 179 }, { "epoch": 0.11292346298619825, "grad_norm": 1.717736840248108, "learning_rate": 2.8144654088050318e-06, "loss": 1.2457, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 180 }, { "epoch": 0.11355081555834379, "grad_norm": 1.9592612981796265, "learning_rate": 2.830188679245283e-06, "loss": 1.6151, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 181 }, { "epoch": 0.11417816813048934, "grad_norm": 1.8280335664749146, "learning_rate": 2.845911949685535e-06, "loss": 1.2718, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 182 }, { "epoch": 0.11480552070263488, "grad_norm": 1.8632508516311646, "learning_rate": 2.861635220125786e-06, "loss": 1.5193, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 183 }, { "epoch": 0.11543287327478043, "grad_norm": 1.5125095844268799, "learning_rate": 2.877358490566038e-06, "loss": 1.3563, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 184 }, { "epoch": 0.11606022584692598, "grad_norm": 3.884819269180298, "learning_rate": 2.8930817610062893e-06, "loss": 1.4289, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 185 }, { "epoch": 0.11668757841907151, "grad_norm": 2.071526527404785, "learning_rate": 2.908805031446541e-06, "loss": 1.4305, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 186 }, { "epoch": 0.11731493099121706, "grad_norm": 1.6391427516937256, "learning_rate": 2.9245283018867924e-06, "loss": 1.434, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 187 }, { "epoch": 0.11794228356336262, "grad_norm": 1.7116525173187256, "learning_rate": 2.940251572327044e-06, "loss": 1.3891, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 188 }, { "epoch": 0.11856963613550815, "grad_norm": 2.0668036937713623, "learning_rate": 2.9559748427672955e-06, "loss": 1.4199, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 189 }, { "epoch": 0.1191969887076537, "grad_norm": 1.774177074432373, "learning_rate": 2.9716981132075473e-06, "loss": 1.3575, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 190 }, { "epoch": 0.11982434127979925, "grad_norm": 1.622582197189331, "learning_rate": 2.987421383647799e-06, "loss": 1.3625, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 191 }, { "epoch": 0.12045169385194479, "grad_norm": 1.9640077352523804, "learning_rate": 3.0031446540880504e-06, "loss": 1.3435, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 192 }, { "epoch": 0.12107904642409034, "grad_norm": 1.9893110990524292, "learning_rate": 3.018867924528302e-06, "loss": 1.3283, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 193 }, { "epoch": 0.12170639899623588, "grad_norm": 1.7824875116348267, "learning_rate": 3.0345911949685535e-06, "loss": 1.3816, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 194 }, { "epoch": 0.12233375156838143, "grad_norm": 1.4995529651641846, "learning_rate": 3.0503144654088053e-06, "loss": 1.2341, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 195 }, { "epoch": 0.12296110414052698, "grad_norm": 1.909799337387085, "learning_rate": 3.0660377358490567e-06, "loss": 1.396, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 196 }, { "epoch": 0.12358845671267252, "grad_norm": 1.9422545433044434, "learning_rate": 3.0817610062893084e-06, "loss": 1.4268, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 197 }, { "epoch": 0.12421580928481807, "grad_norm": 1.5427347421646118, "learning_rate": 3.0974842767295598e-06, "loss": 1.2787, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 198 }, { "epoch": 0.12484316185696362, "grad_norm": 2.090911865234375, "learning_rate": 3.1132075471698115e-06, "loss": 1.3448, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 199 }, { "epoch": 0.12547051442910917, "grad_norm": 1.820373296737671, "learning_rate": 3.128930817610063e-06, "loss": 1.4195, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 200 }, { "epoch": 0.1260978670012547, "grad_norm": 2.1247458457946777, "learning_rate": 3.1446540880503146e-06, "loss": 1.3468, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 201 }, { "epoch": 0.12672521957340024, "grad_norm": 2.0211782455444336, "learning_rate": 3.160377358490566e-06, "loss": 1.2897, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 202 }, { "epoch": 0.1273525721455458, "grad_norm": 2.2064476013183594, "learning_rate": 3.1761006289308178e-06, "loss": 1.3444, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 203 }, { "epoch": 0.12797992471769135, "grad_norm": 1.8434877395629883, "learning_rate": 3.191823899371069e-06, "loss": 1.3776, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 204 }, { "epoch": 0.1286072772898369, "grad_norm": 1.9339540004730225, "learning_rate": 3.207547169811321e-06, "loss": 1.2157, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 205 }, { "epoch": 0.12923462986198245, "grad_norm": 1.8434295654296875, "learning_rate": 3.2232704402515726e-06, "loss": 1.3971, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 206 }, { "epoch": 0.12986198243412797, "grad_norm": 1.5623953342437744, "learning_rate": 3.238993710691824e-06, "loss": 1.3002, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 207 }, { "epoch": 0.13048933500627352, "grad_norm": 1.600899577140808, "learning_rate": 3.2547169811320758e-06, "loss": 1.4586, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 208 }, { "epoch": 0.13111668757841907, "grad_norm": 2.4249284267425537, "learning_rate": 3.270440251572327e-06, "loss": 1.3632, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 209 }, { "epoch": 0.13174404015056462, "grad_norm": 1.8537931442260742, "learning_rate": 3.286163522012579e-06, "loss": 1.3831, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 210 }, { "epoch": 0.13237139272271017, "grad_norm": 1.504931092262268, "learning_rate": 3.30188679245283e-06, "loss": 1.2481, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 211 }, { "epoch": 0.1329987452948557, "grad_norm": 1.8068492412567139, "learning_rate": 3.317610062893082e-06, "loss": 1.3971, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 212 }, { "epoch": 0.13362609786700125, "grad_norm": 1.5803545713424683, "learning_rate": 3.3333333333333333e-06, "loss": 1.3475, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 213 }, { "epoch": 0.1342534504391468, "grad_norm": 1.7603237628936768, "learning_rate": 3.349056603773585e-06, "loss": 1.3331, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 214 }, { "epoch": 0.13488080301129235, "grad_norm": 1.8488463163375854, "learning_rate": 3.3647798742138364e-06, "loss": 1.3273, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 215 }, { "epoch": 0.1355081555834379, "grad_norm": 1.9468002319335938, "learning_rate": 3.380503144654088e-06, "loss": 1.3886, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 216 }, { "epoch": 0.13613550815558345, "grad_norm": 2.5950703620910645, "learning_rate": 3.3962264150943395e-06, "loss": 1.316, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 217 }, { "epoch": 0.13676286072772897, "grad_norm": 1.8121410608291626, "learning_rate": 3.4119496855345913e-06, "loss": 1.3347, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 218 }, { "epoch": 0.13739021329987453, "grad_norm": 1.9044947624206543, "learning_rate": 3.4276729559748427e-06, "loss": 1.4193, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 219 }, { "epoch": 0.13801756587202008, "grad_norm": 1.817305564880371, "learning_rate": 3.4433962264150944e-06, "loss": 1.2194, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 220 }, { "epoch": 0.13864491844416563, "grad_norm": 1.3980709314346313, "learning_rate": 3.4591194968553458e-06, "loss": 1.256, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 221 }, { "epoch": 0.13927227101631118, "grad_norm": 1.7033888101577759, "learning_rate": 3.4748427672955975e-06, "loss": 1.3148, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 222 }, { "epoch": 0.1398996235884567, "grad_norm": 1.9811346530914307, "learning_rate": 3.4905660377358493e-06, "loss": 1.2855, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 223 }, { "epoch": 0.14052697616060225, "grad_norm": 1.5731050968170166, "learning_rate": 3.5062893081761007e-06, "loss": 1.3025, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 224 }, { "epoch": 0.1411543287327478, "grad_norm": 1.8436543941497803, "learning_rate": 3.5220125786163524e-06, "loss": 1.3267, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 225 }, { "epoch": 0.14178168130489335, "grad_norm": 1.9756402969360352, "learning_rate": 3.5377358490566038e-06, "loss": 1.403, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 226 }, { "epoch": 0.1424090338770389, "grad_norm": 1.7208794355392456, "learning_rate": 3.5534591194968555e-06, "loss": 1.2933, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 227 }, { "epoch": 0.14303638644918445, "grad_norm": 1.8221158981323242, "learning_rate": 3.569182389937107e-06, "loss": 1.3209, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 228 }, { "epoch": 0.14366373902132998, "grad_norm": 1.9259345531463623, "learning_rate": 3.5849056603773586e-06, "loss": 1.4681, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 229 }, { "epoch": 0.14429109159347553, "grad_norm": 1.8323369026184082, "learning_rate": 3.60062893081761e-06, "loss": 1.3018, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 230 }, { "epoch": 0.14491844416562108, "grad_norm": 1.7952187061309814, "learning_rate": 3.6163522012578618e-06, "loss": 1.411, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 231 }, { "epoch": 0.14554579673776663, "grad_norm": 1.8569618463516235, "learning_rate": 3.632075471698113e-06, "loss": 1.3432, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 232 }, { "epoch": 0.14617314930991218, "grad_norm": 1.9454624652862549, "learning_rate": 3.647798742138365e-06, "loss": 1.3542, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 233 }, { "epoch": 0.1468005018820577, "grad_norm": 1.7847152948379517, "learning_rate": 3.6635220125786162e-06, "loss": 1.4037, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 234 }, { "epoch": 0.14742785445420326, "grad_norm": 1.9342718124389648, "learning_rate": 3.679245283018868e-06, "loss": 1.4753, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 235 }, { "epoch": 0.1480552070263488, "grad_norm": 1.8230245113372803, "learning_rate": 3.6949685534591193e-06, "loss": 1.3496, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 236 }, { "epoch": 0.14868255959849436, "grad_norm": 2.1563334465026855, "learning_rate": 3.710691823899371e-06, "loss": 1.3971, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 237 }, { "epoch": 0.1493099121706399, "grad_norm": 1.9064549207687378, "learning_rate": 3.726415094339623e-06, "loss": 1.3381, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 238 }, { "epoch": 0.14993726474278546, "grad_norm": 1.7572532892227173, "learning_rate": 3.742138364779874e-06, "loss": 1.3343, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 239 }, { "epoch": 0.15056461731493098, "grad_norm": 1.8741847276687622, "learning_rate": 3.7578616352201264e-06, "loss": 1.2935, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 240 }, { "epoch": 0.15119196988707653, "grad_norm": 21.64093589782715, "learning_rate": 3.7735849056603777e-06, "loss": 1.3308, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 241 }, { "epoch": 0.15181932245922208, "grad_norm": 2.0540499687194824, "learning_rate": 3.7893081761006295e-06, "loss": 1.3383, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 242 }, { "epoch": 0.15244667503136763, "grad_norm": 2.0343210697174072, "learning_rate": 3.8050314465408813e-06, "loss": 1.3605, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 243 }, { "epoch": 0.15307402760351319, "grad_norm": 1.7628991603851318, "learning_rate": 3.820754716981133e-06, "loss": 1.2637, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 244 }, { "epoch": 0.1537013801756587, "grad_norm": 1.801661491394043, "learning_rate": 3.836477987421384e-06, "loss": 1.4673, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 245 }, { "epoch": 0.15432873274780426, "grad_norm": 1.7599327564239502, "learning_rate": 3.852201257861636e-06, "loss": 1.3397, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 246 }, { "epoch": 0.1549560853199498, "grad_norm": 1.9988664388656616, "learning_rate": 3.8679245283018875e-06, "loss": 1.2941, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 247 }, { "epoch": 0.15558343789209536, "grad_norm": 1.2556072473526, "learning_rate": 3.883647798742139e-06, "loss": 1.2417, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 248 }, { "epoch": 0.1562107904642409, "grad_norm": 2.0255300998687744, "learning_rate": 3.89937106918239e-06, "loss": 1.455, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 249 }, { "epoch": 0.15683814303638646, "grad_norm": 1.8777093887329102, "learning_rate": 3.915094339622642e-06, "loss": 1.244, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 250 }, { "epoch": 0.15746549560853199, "grad_norm": 2.1528234481811523, "learning_rate": 3.930817610062894e-06, "loss": 1.3446, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 251 }, { "epoch": 0.15809284818067754, "grad_norm": 1.7366938591003418, "learning_rate": 3.9465408805031455e-06, "loss": 1.2959, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 252 }, { "epoch": 0.1587202007528231, "grad_norm": 1.7935433387756348, "learning_rate": 3.962264150943396e-06, "loss": 1.3411, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 253 }, { "epoch": 0.15934755332496864, "grad_norm": 1.8638379573822021, "learning_rate": 3.977987421383648e-06, "loss": 1.3481, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 254 }, { "epoch": 0.1599749058971142, "grad_norm": 1.9182302951812744, "learning_rate": 3.9937106918239e-06, "loss": 1.3058, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 255 }, { "epoch": 0.1606022584692597, "grad_norm": 1.832619309425354, "learning_rate": 4.009433962264152e-06, "loss": 1.352, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 256 }, { "epoch": 0.16122961104140526, "grad_norm": 1.579271674156189, "learning_rate": 4.025157232704403e-06, "loss": 1.2741, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 257 }, { "epoch": 0.1618569636135508, "grad_norm": 1.567428708076477, "learning_rate": 4.040880503144654e-06, "loss": 1.2891, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 258 }, { "epoch": 0.16248431618569636, "grad_norm": 1.7496509552001953, "learning_rate": 4.056603773584906e-06, "loss": 1.2356, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 259 }, { "epoch": 0.16311166875784192, "grad_norm": 1.7312959432601929, "learning_rate": 4.072327044025158e-06, "loss": 1.3896, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 260 }, { "epoch": 0.16373902132998747, "grad_norm": 2.2512001991271973, "learning_rate": 4.08805031446541e-06, "loss": 1.353, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 261 }, { "epoch": 0.164366373902133, "grad_norm": 1.853335976600647, "learning_rate": 4.103773584905661e-06, "loss": 1.5254, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 262 }, { "epoch": 0.16499372647427854, "grad_norm": 1.9108259677886963, "learning_rate": 4.119496855345912e-06, "loss": 1.4425, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 263 }, { "epoch": 0.1656210790464241, "grad_norm": 1.7109558582305908, "learning_rate": 4.135220125786164e-06, "loss": 1.1786, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 264 }, { "epoch": 0.16624843161856964, "grad_norm": 1.9971568584442139, "learning_rate": 4.150943396226416e-06, "loss": 1.4076, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 265 }, { "epoch": 0.1668757841907152, "grad_norm": 1.3846633434295654, "learning_rate": 4.166666666666667e-06, "loss": 1.4011, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 266 }, { "epoch": 0.16750313676286072, "grad_norm": 1.7542074918746948, "learning_rate": 4.182389937106919e-06, "loss": 1.2915, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 267 }, { "epoch": 0.16813048933500627, "grad_norm": 1.7762579917907715, "learning_rate": 4.19811320754717e-06, "loss": 1.3584, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 268 }, { "epoch": 0.16875784190715182, "grad_norm": 1.8659406900405884, "learning_rate": 4.213836477987422e-06, "loss": 1.3343, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 269 }, { "epoch": 0.16938519447929737, "grad_norm": 1.7126916646957397, "learning_rate": 4.229559748427673e-06, "loss": 1.4461, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 270 }, { "epoch": 0.17001254705144292, "grad_norm": 2.4897971153259277, "learning_rate": 4.245283018867925e-06, "loss": 1.3599, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 271 }, { "epoch": 0.17063989962358847, "grad_norm": 1.7607709169387817, "learning_rate": 4.261006289308177e-06, "loss": 1.2669, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 272 }, { "epoch": 0.171267252195734, "grad_norm": 1.936936378479004, "learning_rate": 4.276729559748428e-06, "loss": 1.4449, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 273 }, { "epoch": 0.17189460476787954, "grad_norm": 1.7051970958709717, "learning_rate": 4.29245283018868e-06, "loss": 1.2865, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 274 }, { "epoch": 0.1725219573400251, "grad_norm": 1.7027671337127686, "learning_rate": 4.308176100628931e-06, "loss": 1.2152, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 275 }, { "epoch": 0.17314930991217065, "grad_norm": 1.8415780067443848, "learning_rate": 4.323899371069183e-06, "loss": 1.3654, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 276 }, { "epoch": 0.1737766624843162, "grad_norm": 2.0088579654693604, "learning_rate": 4.339622641509435e-06, "loss": 1.416, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 277 }, { "epoch": 0.17440401505646172, "grad_norm": 1.8947542905807495, "learning_rate": 4.355345911949686e-06, "loss": 1.2923, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 278 }, { "epoch": 0.17503136762860727, "grad_norm": 1.9341015815734863, "learning_rate": 4.371069182389937e-06, "loss": 1.3498, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 279 }, { "epoch": 0.17565872020075282, "grad_norm": 1.9172481298446655, "learning_rate": 4.386792452830189e-06, "loss": 1.4128, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 280 }, { "epoch": 0.17628607277289837, "grad_norm": 1.8716827630996704, "learning_rate": 4.402515723270441e-06, "loss": 1.3574, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 281 }, { "epoch": 0.17691342534504392, "grad_norm": 1.814761996269226, "learning_rate": 4.418238993710693e-06, "loss": 1.3448, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 282 }, { "epoch": 0.17754077791718947, "grad_norm": 2.0237176418304443, "learning_rate": 4.4339622641509435e-06, "loss": 1.2693, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 283 }, { "epoch": 0.178168130489335, "grad_norm": 1.7121891975402832, "learning_rate": 4.449685534591195e-06, "loss": 1.2457, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 284 }, { "epoch": 0.17879548306148055, "grad_norm": 1.781483769416809, "learning_rate": 4.465408805031447e-06, "loss": 1.2307, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 285 }, { "epoch": 0.1794228356336261, "grad_norm": 1.9639188051223755, "learning_rate": 4.481132075471699e-06, "loss": 1.2809, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 286 }, { "epoch": 0.18005018820577165, "grad_norm": 2.1548235416412354, "learning_rate": 4.49685534591195e-06, "loss": 1.2848, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 287 }, { "epoch": 0.1806775407779172, "grad_norm": 1.7896254062652588, "learning_rate": 4.5125786163522015e-06, "loss": 1.3772, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 288 }, { "epoch": 0.18130489335006272, "grad_norm": 1.9206440448760986, "learning_rate": 4.528301886792453e-06, "loss": 1.3501, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 289 }, { "epoch": 0.18193224592220827, "grad_norm": 1.570934534072876, "learning_rate": 4.544025157232705e-06, "loss": 1.2086, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 290 }, { "epoch": 0.18255959849435383, "grad_norm": 1.782419204711914, "learning_rate": 4.559748427672957e-06, "loss": 1.3928, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 291 }, { "epoch": 0.18318695106649938, "grad_norm": 2.629718780517578, "learning_rate": 4.575471698113208e-06, "loss": 1.3189, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 292 }, { "epoch": 0.18381430363864493, "grad_norm": 1.9206874370574951, "learning_rate": 4.5911949685534595e-06, "loss": 1.3112, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 293 }, { "epoch": 0.18444165621079048, "grad_norm": 1.8122892379760742, "learning_rate": 4.606918238993711e-06, "loss": 1.2539, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 294 }, { "epoch": 0.185069008782936, "grad_norm": 1.8378740549087524, "learning_rate": 4.622641509433963e-06, "loss": 1.307, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 295 }, { "epoch": 0.18569636135508155, "grad_norm": 1.7635501623153687, "learning_rate": 4.638364779874214e-06, "loss": 1.3125, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 296 }, { "epoch": 0.1863237139272271, "grad_norm": 1.7572214603424072, "learning_rate": 4.654088050314466e-06, "loss": 1.4464, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 297 }, { "epoch": 0.18695106649937265, "grad_norm": 1.8707910776138306, "learning_rate": 4.6698113207547175e-06, "loss": 1.4683, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 298 }, { "epoch": 0.1875784190715182, "grad_norm": 1.9102506637573242, "learning_rate": 4.685534591194969e-06, "loss": 1.3676, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 299 }, { "epoch": 0.18820577164366373, "grad_norm": 1.95762038230896, "learning_rate": 4.70125786163522e-06, "loss": 1.4814, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 300 }, { "epoch": 0.18883312421580928, "grad_norm": 1.8276519775390625, "learning_rate": 4.716981132075472e-06, "loss": 1.3073, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 301 }, { "epoch": 0.18946047678795483, "grad_norm": 1.9433486461639404, "learning_rate": 4.732704402515724e-06, "loss": 1.2964, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 302 }, { "epoch": 0.19008782936010038, "grad_norm": 1.7516218423843384, "learning_rate": 4.7484276729559755e-06, "loss": 1.3246, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 303 }, { "epoch": 0.19071518193224593, "grad_norm": 1.6977726221084595, "learning_rate": 4.764150943396227e-06, "loss": 1.2985, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 304 }, { "epoch": 0.19134253450439148, "grad_norm": 1.5477553606033325, "learning_rate": 4.779874213836478e-06, "loss": 1.2887, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 305 }, { "epoch": 0.191969887076537, "grad_norm": 2.1542649269104004, "learning_rate": 4.79559748427673e-06, "loss": 1.3762, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 306 }, { "epoch": 0.19259723964868256, "grad_norm": 1.8527969121932983, "learning_rate": 4.811320754716982e-06, "loss": 1.2428, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 307 }, { "epoch": 0.1932245922208281, "grad_norm": 2.14410400390625, "learning_rate": 4.8270440251572335e-06, "loss": 1.2875, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 308 }, { "epoch": 0.19385194479297366, "grad_norm": 1.9714504480361938, "learning_rate": 4.842767295597484e-06, "loss": 1.6028, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 309 }, { "epoch": 0.1944792973651192, "grad_norm": 2.064124345779419, "learning_rate": 4.858490566037736e-06, "loss": 1.3189, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 310 }, { "epoch": 0.19510664993726473, "grad_norm": 1.954937219619751, "learning_rate": 4.874213836477988e-06, "loss": 1.3709, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 311 }, { "epoch": 0.19573400250941028, "grad_norm": 1.5165770053863525, "learning_rate": 4.88993710691824e-06, "loss": 1.4661, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 312 }, { "epoch": 0.19636135508155583, "grad_norm": 1.927662968635559, "learning_rate": 4.905660377358491e-06, "loss": 1.2719, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 313 }, { "epoch": 0.19698870765370138, "grad_norm": 1.7934112548828125, "learning_rate": 4.921383647798742e-06, "loss": 1.5374, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 314 }, { "epoch": 0.19761606022584693, "grad_norm": 2.1834499835968018, "learning_rate": 4.937106918238994e-06, "loss": 1.3703, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 315 }, { "epoch": 0.19824341279799249, "grad_norm": 1.7504669427871704, "learning_rate": 4.952830188679246e-06, "loss": 1.3915, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 316 }, { "epoch": 0.198870765370138, "grad_norm": 1.904638409614563, "learning_rate": 4.968553459119497e-06, "loss": 1.2474, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 317 }, { "epoch": 0.19949811794228356, "grad_norm": 1.925824761390686, "learning_rate": 4.984276729559749e-06, "loss": 1.3469, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 318 }, { "epoch": 0.2001254705144291, "grad_norm": 2.1691761016845703, "learning_rate": 5e-06, "loss": 1.2512, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 319 }, { "epoch": 0.20075282308657466, "grad_norm": 1.7544150352478027, "learning_rate": 4.999998502227263e-06, "loss": 1.3736, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 320 }, { "epoch": 0.2013801756587202, "grad_norm": 1.6073588132858276, "learning_rate": 4.9999940089108415e-06, "loss": 1.4896, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 321 }, { "epoch": 0.20200752823086573, "grad_norm": 1.6996064186096191, "learning_rate": 4.999986520056122e-06, "loss": 1.2907, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 322 }, { "epoch": 0.20263488080301129, "grad_norm": 1.7334671020507812, "learning_rate": 4.999976035672078e-06, "loss": 1.3704, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 323 }, { "epoch": 0.20326223337515684, "grad_norm": 1.9705431461334229, "learning_rate": 4.999962555771272e-06, "loss": 1.2347, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 324 }, { "epoch": 0.2038895859473024, "grad_norm": 1.3459328413009644, "learning_rate": 4.999946080369854e-06, "loss": 1.3108, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 325 }, { "epoch": 0.20451693851944794, "grad_norm": 1.8672802448272705, "learning_rate": 4.999926609487568e-06, "loss": 1.3796, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 326 }, { "epoch": 0.2051442910915935, "grad_norm": 2.1950390338897705, "learning_rate": 4.9999041431477425e-06, "loss": 1.2373, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 327 }, { "epoch": 0.205771643663739, "grad_norm": 1.8165322542190552, "learning_rate": 4.999878681377297e-06, "loss": 1.2377, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 328 }, { "epoch": 0.20639899623588456, "grad_norm": 1.840531587600708, "learning_rate": 4.999850224206741e-06, "loss": 1.3705, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 329 }, { "epoch": 0.20702634880803011, "grad_norm": 1.6710518598556519, "learning_rate": 4.999818771670172e-06, "loss": 1.3876, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 330 }, { "epoch": 0.20765370138017566, "grad_norm": 2.126668930053711, "learning_rate": 4.9997843238052776e-06, "loss": 1.3916, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 331 }, { "epoch": 0.20828105395232122, "grad_norm": 1.5642105340957642, "learning_rate": 4.999746880653333e-06, "loss": 1.2617, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 332 }, { "epoch": 0.20890840652446674, "grad_norm": 1.7182477712631226, "learning_rate": 4.999706442259205e-06, "loss": 1.3659, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 333 }, { "epoch": 0.2095357590966123, "grad_norm": 1.906502604484558, "learning_rate": 4.999663008671344e-06, "loss": 1.3145, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 334 }, { "epoch": 0.21016311166875784, "grad_norm": 2.0408036708831787, "learning_rate": 4.999616579941797e-06, "loss": 1.3193, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 335 }, { "epoch": 0.2107904642409034, "grad_norm": 1.7874103784561157, "learning_rate": 4.999567156126193e-06, "loss": 1.308, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 336 }, { "epoch": 0.21141781681304894, "grad_norm": 1.9260340929031372, "learning_rate": 4.999514737283754e-06, "loss": 1.3081, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 337 }, { "epoch": 0.2120451693851945, "grad_norm": 1.7831474542617798, "learning_rate": 4.999459323477288e-06, "loss": 1.2228, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 338 }, { "epoch": 0.21267252195734002, "grad_norm": 1.9563218355178833, "learning_rate": 4.999400914773193e-06, "loss": 1.3612, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 339 }, { "epoch": 0.21329987452948557, "grad_norm": 3.149996280670166, "learning_rate": 4.999339511241458e-06, "loss": 1.2988, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 340 }, { "epoch": 0.21392722710163112, "grad_norm": 1.7452430725097656, "learning_rate": 4.999275112955654e-06, "loss": 1.3759, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 341 }, { "epoch": 0.21455457967377667, "grad_norm": 1.843584656715393, "learning_rate": 4.999207719992947e-06, "loss": 1.397, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 342 }, { "epoch": 0.21518193224592222, "grad_norm": 1.9594056606292725, "learning_rate": 4.999137332434086e-06, "loss": 1.2943, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 343 }, { "epoch": 0.21580928481806774, "grad_norm": 1.8866289854049683, "learning_rate": 4.999063950363413e-06, "loss": 1.4503, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 344 }, { "epoch": 0.2164366373902133, "grad_norm": 1.8794231414794922, "learning_rate": 4.9989875738688555e-06, "loss": 1.2822, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 345 }, { "epoch": 0.21706398996235884, "grad_norm": 2.0103776454925537, "learning_rate": 4.998908203041926e-06, "loss": 1.3155, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 346 }, { "epoch": 0.2176913425345044, "grad_norm": 1.9219310283660889, "learning_rate": 4.9988258379777334e-06, "loss": 1.3347, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 347 }, { "epoch": 0.21831869510664995, "grad_norm": 1.6140512228012085, "learning_rate": 4.998740478774965e-06, "loss": 1.2699, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 348 }, { "epoch": 0.2189460476787955, "grad_norm": 1.647599220275879, "learning_rate": 4.998652125535901e-06, "loss": 1.3667, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 349 }, { "epoch": 0.21957340025094102, "grad_norm": 1.810556173324585, "learning_rate": 4.998560778366407e-06, "loss": 1.4081, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 350 }, { "epoch": 0.22020075282308657, "grad_norm": 1.6720383167266846, "learning_rate": 4.998466437375938e-06, "loss": 1.309, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 351 }, { "epoch": 0.22082810539523212, "grad_norm": 1.6353473663330078, "learning_rate": 4.998369102677535e-06, "loss": 1.2393, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 352 }, { "epoch": 0.22145545796737767, "grad_norm": 1.7175465822219849, "learning_rate": 4.9982687743878256e-06, "loss": 1.318, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 353 }, { "epoch": 0.22208281053952322, "grad_norm": 1.627525806427002, "learning_rate": 4.998165452627025e-06, "loss": 1.2159, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 354 }, { "epoch": 0.22271016311166875, "grad_norm": 1.8175550699234009, "learning_rate": 4.998059137518936e-06, "loss": 1.2853, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 355 }, { "epoch": 0.2233375156838143, "grad_norm": 2.5346550941467285, "learning_rate": 4.997949829190947e-06, "loss": 1.3752, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 356 }, { "epoch": 0.22396486825595985, "grad_norm": 1.8762712478637695, "learning_rate": 4.997837527774033e-06, "loss": 1.3113, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 357 }, { "epoch": 0.2245922208281054, "grad_norm": 1.645166277885437, "learning_rate": 4.997722233402755e-06, "loss": 1.3383, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 358 }, { "epoch": 0.22521957340025095, "grad_norm": 1.9214521646499634, "learning_rate": 4.997603946215262e-06, "loss": 1.1769, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 359 }, { "epoch": 0.2258469259723965, "grad_norm": 1.7410590648651123, "learning_rate": 4.997482666353287e-06, "loss": 1.2254, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 360 }, { "epoch": 0.22647427854454202, "grad_norm": 2.080554723739624, "learning_rate": 4.99735839396215e-06, "loss": 1.2844, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 361 }, { "epoch": 0.22710163111668757, "grad_norm": 1.9182201623916626, "learning_rate": 4.997231129190757e-06, "loss": 1.3778, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 362 }, { "epoch": 0.22772898368883313, "grad_norm": 2.0027408599853516, "learning_rate": 4.997100872191598e-06, "loss": 1.3227, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 363 }, { "epoch": 0.22835633626097868, "grad_norm": 1.7964129447937012, "learning_rate": 4.99696762312075e-06, "loss": 1.3391, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 364 }, { "epoch": 0.22898368883312423, "grad_norm": 1.8464184999465942, "learning_rate": 4.996831382137873e-06, "loss": 1.4158, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 365 }, { "epoch": 0.22961104140526975, "grad_norm": 1.7289762496948242, "learning_rate": 4.996692149406216e-06, "loss": 1.3439, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 366 }, { "epoch": 0.2302383939774153, "grad_norm": 1.4288086891174316, "learning_rate": 4.996549925092609e-06, "loss": 1.2174, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 367 }, { "epoch": 0.23086574654956085, "grad_norm": 1.8290668725967407, "learning_rate": 4.996404709367466e-06, "loss": 1.3256, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 368 }, { "epoch": 0.2314930991217064, "grad_norm": 1.6571376323699951, "learning_rate": 4.99625650240479e-06, "loss": 1.2822, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 369 }, { "epoch": 0.23212045169385195, "grad_norm": 1.8437656164169312, "learning_rate": 4.9961053043821636e-06, "loss": 1.3772, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 370 }, { "epoch": 0.2327478042659975, "grad_norm": 1.6965371370315552, "learning_rate": 4.9959511154807555e-06, "loss": 1.2846, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 371 }, { "epoch": 0.23337515683814303, "grad_norm": 1.8487476110458374, "learning_rate": 4.995793935885317e-06, "loss": 1.3123, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 372 }, { "epoch": 0.23400250941028858, "grad_norm": 1.8760329484939575, "learning_rate": 4.995633765784185e-06, "loss": 1.2402, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 373 }, { "epoch": 0.23462986198243413, "grad_norm": 1.8349744081497192, "learning_rate": 4.9954706053692766e-06, "loss": 1.2657, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 374 }, { "epoch": 0.23525721455457968, "grad_norm": 1.6800428628921509, "learning_rate": 4.995304454836095e-06, "loss": 1.2008, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 375 }, { "epoch": 0.23588456712672523, "grad_norm": 1.647479772567749, "learning_rate": 4.9951353143837235e-06, "loss": 1.2184, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 376 }, { "epoch": 0.23651191969887075, "grad_norm": 1.886382818222046, "learning_rate": 4.99496318421483e-06, "loss": 1.3817, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 377 }, { "epoch": 0.2371392722710163, "grad_norm": 1.8178256750106812, "learning_rate": 4.9947880645356636e-06, "loss": 1.4071, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 378 }, { "epoch": 0.23776662484316186, "grad_norm": 1.8750951290130615, "learning_rate": 4.994609955556057e-06, "loss": 1.2923, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 379 }, { "epoch": 0.2383939774153074, "grad_norm": 1.7878093719482422, "learning_rate": 4.994428857489422e-06, "loss": 1.3081, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 380 }, { "epoch": 0.23902132998745296, "grad_norm": 2.013211488723755, "learning_rate": 4.994244770552755e-06, "loss": 1.4391, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 381 }, { "epoch": 0.2396486825595985, "grad_norm": 1.7840601205825806, "learning_rate": 4.994057694966632e-06, "loss": 1.283, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 382 }, { "epoch": 0.24027603513174403, "grad_norm": 1.9290210008621216, "learning_rate": 4.993867630955209e-06, "loss": 1.2982, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 383 }, { "epoch": 0.24090338770388958, "grad_norm": 1.8992472887039185, "learning_rate": 4.993674578746225e-06, "loss": 1.2167, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 384 }, { "epoch": 0.24153074027603513, "grad_norm": 2.0222549438476562, "learning_rate": 4.993478538571e-06, "loss": 1.3329, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 385 }, { "epoch": 0.24215809284818068, "grad_norm": 1.8096097707748413, "learning_rate": 4.99327951066443e-06, "loss": 1.4154, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 386 }, { "epoch": 0.24278544542032623, "grad_norm": 1.4684665203094482, "learning_rate": 4.9930774952649975e-06, "loss": 1.3255, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 387 }, { "epoch": 0.24341279799247176, "grad_norm": 1.7699239253997803, "learning_rate": 4.9928724926147586e-06, "loss": 1.3138, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 388 }, { "epoch": 0.2440401505646173, "grad_norm": 1.8916990756988525, "learning_rate": 4.992664502959351e-06, "loss": 1.5344, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 389 }, { "epoch": 0.24466750313676286, "grad_norm": 2.013256311416626, "learning_rate": 4.992453526547993e-06, "loss": 1.4169, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 390 }, { "epoch": 0.2452948557089084, "grad_norm": 1.911051630973816, "learning_rate": 4.9922395636334806e-06, "loss": 1.3485, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 391 }, { "epoch": 0.24592220828105396, "grad_norm": 2.74343204498291, "learning_rate": 4.992022614472187e-06, "loss": 1.25, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 392 }, { "epoch": 0.2465495608531995, "grad_norm": 1.8582487106323242, "learning_rate": 4.991802679324064e-06, "loss": 1.3874, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 393 }, { "epoch": 0.24717691342534504, "grad_norm": 1.8683292865753174, "learning_rate": 4.991579758452644e-06, "loss": 1.3666, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 394 }, { "epoch": 0.24780426599749059, "grad_norm": 1.898242712020874, "learning_rate": 4.991353852125032e-06, "loss": 1.2622, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 395 }, { "epoch": 0.24843161856963614, "grad_norm": 1.793607234954834, "learning_rate": 4.991124960611916e-06, "loss": 1.3744, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 396 }, { "epoch": 0.2490589711417817, "grad_norm": 1.8810088634490967, "learning_rate": 4.990893084187557e-06, "loss": 1.3638, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 397 }, { "epoch": 0.24968632371392724, "grad_norm": 1.5801982879638672, "learning_rate": 4.990658223129793e-06, "loss": 1.3114, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 398 }, { "epoch": 0.25031367628607276, "grad_norm": 1.5382050275802612, "learning_rate": 4.9904203777200375e-06, "loss": 1.2568, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 399 }, { "epoch": 0.25094102885821834, "grad_norm": 1.5019408464431763, "learning_rate": 4.990179548243285e-06, "loss": 1.3077, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 400 }, { "epoch": 0.25156838143036386, "grad_norm": 1.5238393545150757, "learning_rate": 4.989935734988098e-06, "loss": 1.255, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 401 }, { "epoch": 0.2521957340025094, "grad_norm": 1.8959760665893555, "learning_rate": 4.98968893824662e-06, "loss": 1.2656, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 402 }, { "epoch": 0.25282308657465496, "grad_norm": 1.4434200525283813, "learning_rate": 4.989439158314566e-06, "loss": 1.1504, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 403 }, { "epoch": 0.2534504391468005, "grad_norm": 1.9673125743865967, "learning_rate": 4.989186395491229e-06, "loss": 1.4144, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 404 }, { "epoch": 0.25407779171894607, "grad_norm": 1.916951060295105, "learning_rate": 4.988930650079472e-06, "loss": 1.257, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 405 }, { "epoch": 0.2547051442910916, "grad_norm": 1.7569973468780518, "learning_rate": 4.988671922385735e-06, "loss": 1.4437, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 406 }, { "epoch": 0.2553324968632371, "grad_norm": 1.8653000593185425, "learning_rate": 4.988410212720029e-06, "loss": 1.2732, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 407 }, { "epoch": 0.2559598494353827, "grad_norm": 2.0402140617370605, "learning_rate": 4.9881455213959405e-06, "loss": 1.3318, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 408 }, { "epoch": 0.2565872020075282, "grad_norm": 1.4472616910934448, "learning_rate": 4.987877848730627e-06, "loss": 1.2535, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 409 }, { "epoch": 0.2572145545796738, "grad_norm": 1.8016327619552612, "learning_rate": 4.9876071950448185e-06, "loss": 1.4344, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 410 }, { "epoch": 0.2578419071518193, "grad_norm": 1.69692063331604, "learning_rate": 4.987333560662817e-06, "loss": 1.2204, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 411 }, { "epoch": 0.2584692597239649, "grad_norm": 1.9340769052505493, "learning_rate": 4.987056945912498e-06, "loss": 1.3763, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 412 }, { "epoch": 0.2590966122961104, "grad_norm": 1.7981717586517334, "learning_rate": 4.986777351125304e-06, "loss": 1.2048, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 413 }, { "epoch": 0.25972396486825594, "grad_norm": 8.244365692138672, "learning_rate": 4.986494776636251e-06, "loss": 1.4393, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 414 }, { "epoch": 0.2603513174404015, "grad_norm": 1.6280755996704102, "learning_rate": 4.986209222783925e-06, "loss": 1.3547, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 415 }, { "epoch": 0.26097867001254704, "grad_norm": 1.932045817375183, "learning_rate": 4.985920689910484e-06, "loss": 1.3862, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 416 }, { "epoch": 0.2616060225846926, "grad_norm": 2.241225004196167, "learning_rate": 4.98562917836165e-06, "loss": 1.2758, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 417 }, { "epoch": 0.26223337515683814, "grad_norm": 1.9528926610946655, "learning_rate": 4.9853346884867195e-06, "loss": 1.5007, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 418 }, { "epoch": 0.26286072772898367, "grad_norm": 2.021772861480713, "learning_rate": 4.985037220638556e-06, "loss": 1.3199, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 419 }, { "epoch": 0.26348808030112925, "grad_norm": 2.033529758453369, "learning_rate": 4.984736775173588e-06, "loss": 1.388, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 420 }, { "epoch": 0.26411543287327477, "grad_norm": 2.2411134243011475, "learning_rate": 4.984433352451818e-06, "loss": 1.2186, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 421 }, { "epoch": 0.26474278544542035, "grad_norm": 1.808010458946228, "learning_rate": 4.984126952836811e-06, "loss": 1.3176, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 422 }, { "epoch": 0.26537013801756587, "grad_norm": 1.856432318687439, "learning_rate": 4.983817576695701e-06, "loss": 1.2231, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 423 }, { "epoch": 0.2659974905897114, "grad_norm": 1.4289281368255615, "learning_rate": 4.983505224399188e-06, "loss": 1.3955, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 424 }, { "epoch": 0.266624843161857, "grad_norm": 1.582891821861267, "learning_rate": 4.9831898963215386e-06, "loss": 1.2563, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 425 }, { "epoch": 0.2672521957340025, "grad_norm": 1.8547396659851074, "learning_rate": 4.982871592840583e-06, "loss": 1.3904, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 426 }, { "epoch": 0.2678795483061481, "grad_norm": 1.6647510528564453, "learning_rate": 4.982550314337721e-06, "loss": 1.2344, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 427 }, { "epoch": 0.2685069008782936, "grad_norm": 1.7917332649230957, "learning_rate": 4.9822260611979125e-06, "loss": 1.4628, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 428 }, { "epoch": 0.2691342534504391, "grad_norm": 1.7590855360031128, "learning_rate": 4.981898833809684e-06, "loss": 1.3563, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 429 }, { "epoch": 0.2697616060225847, "grad_norm": 1.706697702407837, "learning_rate": 4.981568632565125e-06, "loss": 1.2132, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 430 }, { "epoch": 0.2703889585947302, "grad_norm": 1.829269528388977, "learning_rate": 4.9812354578598876e-06, "loss": 1.4478, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 431 }, { "epoch": 0.2710163111668758, "grad_norm": 1.4316096305847168, "learning_rate": 4.98089931009319e-06, "loss": 1.2116, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 432 }, { "epoch": 0.2716436637390213, "grad_norm": 1.522438645362854, "learning_rate": 4.98056018966781e-06, "loss": 1.2716, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 433 }, { "epoch": 0.2722710163111669, "grad_norm": 1.749516248703003, "learning_rate": 4.980218096990087e-06, "loss": 1.3609, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 434 }, { "epoch": 0.2728983688833124, "grad_norm": 2.074244499206543, "learning_rate": 4.979873032469923e-06, "loss": 1.1601, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 435 }, { "epoch": 0.27352572145545795, "grad_norm": 2.087233066558838, "learning_rate": 4.979524996520781e-06, "loss": 1.3867, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 436 }, { "epoch": 0.2741530740276035, "grad_norm": 2.0224671363830566, "learning_rate": 4.979173989559684e-06, "loss": 1.4594, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 437 }, { "epoch": 0.27478042659974905, "grad_norm": 1.8196134567260742, "learning_rate": 4.978820012007213e-06, "loss": 1.3352, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 438 }, { "epoch": 0.27540777917189463, "grad_norm": 2.074153423309326, "learning_rate": 4.978463064287513e-06, "loss": 1.4756, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 439 }, { "epoch": 0.27603513174404015, "grad_norm": 1.6791142225265503, "learning_rate": 4.978103146828285e-06, "loss": 1.326, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 440 }, { "epoch": 0.2766624843161857, "grad_norm": 2.263253688812256, "learning_rate": 4.977740260060787e-06, "loss": 1.5105, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 441 }, { "epoch": 0.27728983688833125, "grad_norm": 1.7390024662017822, "learning_rate": 4.977374404419838e-06, "loss": 1.1394, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 442 }, { "epoch": 0.2779171894604768, "grad_norm": 1.5287703275680542, "learning_rate": 4.977005580343811e-06, "loss": 1.4403, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 443 }, { "epoch": 0.27854454203262236, "grad_norm": 1.9817328453063965, "learning_rate": 4.9766337882746395e-06, "loss": 1.3576, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 444 }, { "epoch": 0.2791718946047679, "grad_norm": 3.146590232849121, "learning_rate": 4.976259028657812e-06, "loss": 1.394, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 445 }, { "epoch": 0.2797992471769134, "grad_norm": 1.9648598432540894, "learning_rate": 4.97588130194237e-06, "loss": 1.4508, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 446 }, { "epoch": 0.280426599749059, "grad_norm": 1.9081323146820068, "learning_rate": 4.975500608580914e-06, "loss": 1.3358, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 447 }, { "epoch": 0.2810539523212045, "grad_norm": 1.8519835472106934, "learning_rate": 4.975116949029598e-06, "loss": 1.3292, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 448 }, { "epoch": 0.2816813048933501, "grad_norm": 1.6948096752166748, "learning_rate": 4.974730323748129e-06, "loss": 1.1885, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 449 }, { "epoch": 0.2823086574654956, "grad_norm": 1.8216960430145264, "learning_rate": 4.9743407331997696e-06, "loss": 1.3733, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 450 }, { "epoch": 0.28293601003764113, "grad_norm": 1.7822445631027222, "learning_rate": 4.973948177851333e-06, "loss": 1.4895, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 451 }, { "epoch": 0.2835633626097867, "grad_norm": 1.525521159172058, "learning_rate": 4.973552658173186e-06, "loss": 1.1041, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 452 }, { "epoch": 0.28419071518193223, "grad_norm": 1.5609532594680786, "learning_rate": 4.973154174639249e-06, "loss": 1.2549, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 453 }, { "epoch": 0.2848180677540778, "grad_norm": 1.5253825187683105, "learning_rate": 4.972752727726992e-06, "loss": 1.2703, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 454 }, { "epoch": 0.28544542032622333, "grad_norm": 1.5149940252304077, "learning_rate": 4.972348317917434e-06, "loss": 1.2457, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 455 }, { "epoch": 0.2860727728983689, "grad_norm": 1.466367483139038, "learning_rate": 4.971940945695148e-06, "loss": 1.2483, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 456 }, { "epoch": 0.28670012547051443, "grad_norm": 2.159257650375366, "learning_rate": 4.971530611548254e-06, "loss": 1.2765, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 457 }, { "epoch": 0.28732747804265996, "grad_norm": 1.6494646072387695, "learning_rate": 4.971117315968423e-06, "loss": 1.263, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 458 }, { "epoch": 0.28795483061480553, "grad_norm": 1.8701457977294922, "learning_rate": 4.970701059450872e-06, "loss": 1.392, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 459 }, { "epoch": 0.28858218318695106, "grad_norm": 1.613142490386963, "learning_rate": 4.970281842494368e-06, "loss": 1.1785, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 460 }, { "epoch": 0.28920953575909664, "grad_norm": 1.840915560722351, "learning_rate": 4.969859665601223e-06, "loss": 1.3366, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 461 }, { "epoch": 0.28983688833124216, "grad_norm": 1.8302453756332397, "learning_rate": 4.969434529277299e-06, "loss": 1.4625, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 462 }, { "epoch": 0.2904642409033877, "grad_norm": 1.698702096939087, "learning_rate": 4.969006434032e-06, "loss": 1.3302, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 463 }, { "epoch": 0.29109159347553326, "grad_norm": 2.026204824447632, "learning_rate": 4.96857538037828e-06, "loss": 1.2724, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 464 }, { "epoch": 0.2917189460476788, "grad_norm": 1.7603907585144043, "learning_rate": 4.968141368832633e-06, "loss": 1.2931, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 465 }, { "epoch": 0.29234629861982436, "grad_norm": 2.7061665058135986, "learning_rate": 4.9677043999151e-06, "loss": 1.3099, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 466 }, { "epoch": 0.2929736511919699, "grad_norm": 1.795343041419983, "learning_rate": 4.967264474149267e-06, "loss": 1.2425, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 467 }, { "epoch": 0.2936010037641154, "grad_norm": 1.4162352085113525, "learning_rate": 4.966821592062259e-06, "loss": 1.3789, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 468 }, { "epoch": 0.294228356336261, "grad_norm": 1.8810973167419434, "learning_rate": 4.966375754184746e-06, "loss": 1.2759, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 469 }, { "epoch": 0.2948557089084065, "grad_norm": 1.919083833694458, "learning_rate": 4.965926961050939e-06, "loss": 1.2544, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 470 }, { "epoch": 0.2954830614805521, "grad_norm": 1.7765024900436401, "learning_rate": 4.965475213198589e-06, "loss": 1.2828, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 471 }, { "epoch": 0.2961104140526976, "grad_norm": 1.5920195579528809, "learning_rate": 4.965020511168991e-06, "loss": 1.3994, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 472 }, { "epoch": 0.29673776662484314, "grad_norm": 1.7678951025009155, "learning_rate": 4.964562855506976e-06, "loss": 1.3297, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 473 }, { "epoch": 0.2973651191969887, "grad_norm": 1.9965953826904297, "learning_rate": 4.964102246760915e-06, "loss": 1.271, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 474 }, { "epoch": 0.29799247176913424, "grad_norm": 2.709869623184204, "learning_rate": 4.963638685482717e-06, "loss": 1.2818, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 475 }, { "epoch": 0.2986198243412798, "grad_norm": 1.530746579170227, "learning_rate": 4.963172172227831e-06, "loss": 1.2644, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 476 }, { "epoch": 0.29924717691342534, "grad_norm": 1.8586333990097046, "learning_rate": 4.9627027075552425e-06, "loss": 1.3388, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 477 }, { "epoch": 0.2998745294855709, "grad_norm": 1.7000927925109863, "learning_rate": 4.96223029202747e-06, "loss": 1.3127, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 478 }, { "epoch": 0.30050188205771644, "grad_norm": 1.7719290256500244, "learning_rate": 4.961754926210572e-06, "loss": 1.2862, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 479 }, { "epoch": 0.30112923462986196, "grad_norm": 2.3949620723724365, "learning_rate": 4.961276610674141e-06, "loss": 1.2543, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 480 }, { "epoch": 0.30175658720200754, "grad_norm": 1.7187825441360474, "learning_rate": 4.960795345991302e-06, "loss": 1.1969, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 481 }, { "epoch": 0.30238393977415307, "grad_norm": 1.8787022829055786, "learning_rate": 4.960311132738715e-06, "loss": 1.2647, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 482 }, { "epoch": 0.30301129234629864, "grad_norm": 1.5814067125320435, "learning_rate": 4.959823971496575e-06, "loss": 1.2246, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 483 }, { "epoch": 0.30363864491844417, "grad_norm": 1.7932416200637817, "learning_rate": 4.959333862848605e-06, "loss": 1.3038, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 484 }, { "epoch": 0.3042659974905897, "grad_norm": 2.22336483001709, "learning_rate": 4.958840807382064e-06, "loss": 1.3414, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 485 }, { "epoch": 0.30489335006273527, "grad_norm": 1.9425820112228394, "learning_rate": 4.958344805687739e-06, "loss": 1.2395, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 486 }, { "epoch": 0.3055207026348808, "grad_norm": 1.7737921476364136, "learning_rate": 4.9578458583599495e-06, "loss": 1.2689, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 487 }, { "epoch": 0.30614805520702637, "grad_norm": 1.9404109716415405, "learning_rate": 4.957343965996542e-06, "loss": 1.302, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 488 }, { "epoch": 0.3067754077791719, "grad_norm": 1.6181503534317017, "learning_rate": 4.956839129198892e-06, "loss": 1.3497, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 489 }, { "epoch": 0.3074027603513174, "grad_norm": 2.0537965297698975, "learning_rate": 4.956331348571907e-06, "loss": 1.3725, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 490 }, { "epoch": 0.308030112923463, "grad_norm": 1.6368733644485474, "learning_rate": 4.955820624724018e-06, "loss": 1.2585, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 491 }, { "epoch": 0.3086574654956085, "grad_norm": 1.745303750038147, "learning_rate": 4.955306958267183e-06, "loss": 1.2816, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 492 }, { "epoch": 0.3092848180677541, "grad_norm": 1.9312869310379028, "learning_rate": 4.954790349816886e-06, "loss": 1.2817, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 493 }, { "epoch": 0.3099121706398996, "grad_norm": 1.7578679323196411, "learning_rate": 4.954270799992138e-06, "loss": 1.3172, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 494 }, { "epoch": 0.31053952321204514, "grad_norm": 1.7783771753311157, "learning_rate": 4.953748309415472e-06, "loss": 1.3384, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 495 }, { "epoch": 0.3111668757841907, "grad_norm": 1.5775705575942993, "learning_rate": 4.9532228787129456e-06, "loss": 1.1949, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 496 }, { "epoch": 0.31179422835633624, "grad_norm": 1.4879812002182007, "learning_rate": 4.9526945085141405e-06, "loss": 1.2281, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 497 }, { "epoch": 0.3124215809284818, "grad_norm": 1.920745611190796, "learning_rate": 4.952163199452159e-06, "loss": 1.3227, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 498 }, { "epoch": 0.31304893350062735, "grad_norm": 1.9175952672958374, "learning_rate": 4.951628952163625e-06, "loss": 1.359, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 499 }, { "epoch": 0.3136762860727729, "grad_norm": 1.4619158506393433, "learning_rate": 4.951091767288683e-06, "loss": 1.2466, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 500 }, { "epoch": 0.31430363864491845, "grad_norm": 1.8956916332244873, "learning_rate": 4.950551645470998e-06, "loss": 1.3183, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 501 }, { "epoch": 0.31493099121706397, "grad_norm": 1.4829291105270386, "learning_rate": 4.950008587357755e-06, "loss": 1.2069, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 502 }, { "epoch": 0.31555834378920955, "grad_norm": 2.078758955001831, "learning_rate": 4.949462593599654e-06, "loss": 1.2592, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 503 }, { "epoch": 0.3161856963613551, "grad_norm": 1.880047082901001, "learning_rate": 4.948913664850917e-06, "loss": 1.2492, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 504 }, { "epoch": 0.31681304893350065, "grad_norm": 1.900512933731079, "learning_rate": 4.9483618017692784e-06, "loss": 1.2715, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 505 }, { "epoch": 0.3174404015056462, "grad_norm": 1.6285508871078491, "learning_rate": 4.947807005015991e-06, "loss": 1.2036, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 506 }, { "epoch": 0.3180677540777917, "grad_norm": 1.93606436252594, "learning_rate": 4.947249275255823e-06, "loss": 1.372, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 507 }, { "epoch": 0.3186951066499373, "grad_norm": 1.9985902309417725, "learning_rate": 4.9466886131570565e-06, "loss": 1.3982, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 508 }, { "epoch": 0.3193224592220828, "grad_norm": 1.8069493770599365, "learning_rate": 4.946125019391486e-06, "loss": 1.3255, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 509 }, { "epoch": 0.3199498117942284, "grad_norm": 1.8266329765319824, "learning_rate": 4.94555849463442e-06, "loss": 1.2351, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 510 }, { "epoch": 0.3205771643663739, "grad_norm": 2.004852056503296, "learning_rate": 4.94498903956468e-06, "loss": 1.2493, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 511 }, { "epoch": 0.3212045169385194, "grad_norm": 1.9460428953170776, "learning_rate": 4.944416654864596e-06, "loss": 1.4126, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 512 }, { "epoch": 0.321831869510665, "grad_norm": 1.6291266679763794, "learning_rate": 4.943841341220011e-06, "loss": 1.4045, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 513 }, { "epoch": 0.3224592220828105, "grad_norm": 1.5005308389663696, "learning_rate": 4.943263099320275e-06, "loss": 1.4401, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 514 }, { "epoch": 0.3230865746549561, "grad_norm": 1.3146636486053467, "learning_rate": 4.942681929858249e-06, "loss": 1.3803, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 515 }, { "epoch": 0.3237139272271016, "grad_norm": 1.5461806058883667, "learning_rate": 4.942097833530299e-06, "loss": 1.2672, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 516 }, { "epoch": 0.32434127979924715, "grad_norm": 1.8553651571273804, "learning_rate": 4.9415108110363025e-06, "loss": 1.3967, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 517 }, { "epoch": 0.32496863237139273, "grad_norm": 1.5972548723220825, "learning_rate": 4.940920863079639e-06, "loss": 1.2384, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 518 }, { "epoch": 0.32559598494353825, "grad_norm": 1.6900385618209839, "learning_rate": 4.940327990367196e-06, "loss": 1.2429, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 519 }, { "epoch": 0.32622333751568383, "grad_norm": 1.8452892303466797, "learning_rate": 4.939732193609361e-06, "loss": 1.3469, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 520 }, { "epoch": 0.32685069008782935, "grad_norm": 1.7989273071289062, "learning_rate": 4.939133473520033e-06, "loss": 1.3273, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 521 }, { "epoch": 0.32747804265997493, "grad_norm": 1.6270055770874023, "learning_rate": 4.9385318308166065e-06, "loss": 1.2904, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 522 }, { "epoch": 0.32810539523212046, "grad_norm": 1.8746994733810425, "learning_rate": 4.937927266219982e-06, "loss": 1.4421, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 523 }, { "epoch": 0.328732747804266, "grad_norm": 1.5117107629776, "learning_rate": 4.937319780454559e-06, "loss": 1.2808, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 524 }, { "epoch": 0.32936010037641156, "grad_norm": 2.1387412548065186, "learning_rate": 4.936709374248238e-06, "loss": 1.1458, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 525 }, { "epoch": 0.3299874529485571, "grad_norm": 1.901404857635498, "learning_rate": 4.936096048332421e-06, "loss": 1.418, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 526 }, { "epoch": 0.33061480552070266, "grad_norm": 11.465993881225586, "learning_rate": 4.935479803442002e-06, "loss": 1.325, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 527 }, { "epoch": 0.3312421580928482, "grad_norm": 1.0712916851043701, "learning_rate": 4.934860640315381e-06, "loss": 1.1727, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 528 }, { "epoch": 0.3318695106649937, "grad_norm": 1.8514409065246582, "learning_rate": 4.934238559694448e-06, "loss": 1.3749, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 529 }, { "epoch": 0.3324968632371393, "grad_norm": 1.7288488149642944, "learning_rate": 4.933613562324592e-06, "loss": 1.2642, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 530 }, { "epoch": 0.3331242158092848, "grad_norm": 1.8730841875076294, "learning_rate": 4.932985648954696e-06, "loss": 1.3008, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 531 }, { "epoch": 0.3337515683814304, "grad_norm": 1.5771578550338745, "learning_rate": 4.932354820337138e-06, "loss": 1.2411, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 532 }, { "epoch": 0.3343789209535759, "grad_norm": 1.9279652833938599, "learning_rate": 4.931721077227788e-06, "loss": 1.209, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 533 }, { "epoch": 0.33500627352572143, "grad_norm": 2.032771110534668, "learning_rate": 4.931084420386009e-06, "loss": 1.2686, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 534 }, { "epoch": 0.335633626097867, "grad_norm": 1.446815013885498, "learning_rate": 4.9304448505746535e-06, "loss": 1.2038, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 535 }, { "epoch": 0.33626097867001253, "grad_norm": 2.149000644683838, "learning_rate": 4.929802368560066e-06, "loss": 1.2825, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 536 }, { "epoch": 0.3368883312421581, "grad_norm": 1.8099359273910522, "learning_rate": 4.929156975112081e-06, "loss": 1.2651, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 537 }, { "epoch": 0.33751568381430364, "grad_norm": 2.0547146797180176, "learning_rate": 4.92850867100402e-06, "loss": 1.3422, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 538 }, { "epoch": 0.33814303638644916, "grad_norm": 1.6774853467941284, "learning_rate": 4.927857457012693e-06, "loss": 1.4274, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 539 }, { "epoch": 0.33877038895859474, "grad_norm": 1.6937648057937622, "learning_rate": 4.927203333918396e-06, "loss": 1.2117, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 540 }, { "epoch": 0.33939774153074026, "grad_norm": 1.93661630153656, "learning_rate": 4.926546302504912e-06, "loss": 1.2139, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 541 }, { "epoch": 0.34002509410288584, "grad_norm": 2.4098830223083496, "learning_rate": 4.925886363559507e-06, "loss": 1.2046, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 542 }, { "epoch": 0.34065244667503136, "grad_norm": 1.7324174642562866, "learning_rate": 4.925223517872934e-06, "loss": 1.3581, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 543 }, { "epoch": 0.34127979924717694, "grad_norm": 2.022440195083618, "learning_rate": 4.924557766239424e-06, "loss": 1.246, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 544 }, { "epoch": 0.34190715181932246, "grad_norm": 1.8990328311920166, "learning_rate": 4.923889109456693e-06, "loss": 1.3062, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 545 }, { "epoch": 0.342534504391468, "grad_norm": 2.065661668777466, "learning_rate": 4.923217548325939e-06, "loss": 1.2956, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 546 }, { "epoch": 0.34316185696361357, "grad_norm": 1.897823691368103, "learning_rate": 4.92254308365184e-06, "loss": 1.2868, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 547 }, { "epoch": 0.3437892095357591, "grad_norm": 1.8894264698028564, "learning_rate": 4.9218657162425496e-06, "loss": 1.2409, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 548 }, { "epoch": 0.34441656210790467, "grad_norm": 1.9177500009536743, "learning_rate": 4.921185446909702e-06, "loss": 1.3804, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 549 }, { "epoch": 0.3450439146800502, "grad_norm": 1.6005748510360718, "learning_rate": 4.920502276468408e-06, "loss": 1.1235, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 550 }, { "epoch": 0.3456712672521957, "grad_norm": 1.9727932214736938, "learning_rate": 4.919816205737257e-06, "loss": 1.2998, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 551 }, { "epoch": 0.3462986198243413, "grad_norm": 1.958331823348999, "learning_rate": 4.919127235538309e-06, "loss": 1.1596, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 552 }, { "epoch": 0.3469259723964868, "grad_norm": 1.9714314937591553, "learning_rate": 4.918435366697102e-06, "loss": 1.2857, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 553 }, { "epoch": 0.3475533249686324, "grad_norm": 1.689508318901062, "learning_rate": 4.917740600042645e-06, "loss": 1.4039, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 554 }, { "epoch": 0.3481806775407779, "grad_norm": 2.0137124061584473, "learning_rate": 4.91704293640742e-06, "loss": 1.2008, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 555 }, { "epoch": 0.34880803011292344, "grad_norm": 1.9370338916778564, "learning_rate": 4.9163423766273815e-06, "loss": 1.3269, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 556 }, { "epoch": 0.349435382685069, "grad_norm": 2.1233015060424805, "learning_rate": 4.915638921541952e-06, "loss": 1.4172, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 557 }, { "epoch": 0.35006273525721454, "grad_norm": 1.8814067840576172, "learning_rate": 4.914932571994024e-06, "loss": 1.2576, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 558 }, { "epoch": 0.3506900878293601, "grad_norm": 1.962165117263794, "learning_rate": 4.9142233288299595e-06, "loss": 1.2706, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 559 }, { "epoch": 0.35131744040150564, "grad_norm": 1.8706358671188354, "learning_rate": 4.913511192899586e-06, "loss": 1.3171, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 560 }, { "epoch": 0.35194479297365117, "grad_norm": 1.8167012929916382, "learning_rate": 4.9127961650561966e-06, "loss": 1.4102, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 561 }, { "epoch": 0.35257214554579674, "grad_norm": 2.4058690071105957, "learning_rate": 4.912078246156552e-06, "loss": 1.1793, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 562 }, { "epoch": 0.35319949811794227, "grad_norm": 1.899332880973816, "learning_rate": 4.911357437060875e-06, "loss": 1.2112, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 563 }, { "epoch": 0.35382685069008785, "grad_norm": 2.099212169647217, "learning_rate": 4.9106337386328524e-06, "loss": 1.3686, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 564 }, { "epoch": 0.35445420326223337, "grad_norm": 1.7897380590438843, "learning_rate": 4.909907151739634e-06, "loss": 1.3741, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 565 }, { "epoch": 0.35508155583437895, "grad_norm": 1.8345693349838257, "learning_rate": 4.909177677251828e-06, "loss": 1.3606, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 566 }, { "epoch": 0.35570890840652447, "grad_norm": 1.7460086345672607, "learning_rate": 4.908445316043503e-06, "loss": 1.3124, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 567 }, { "epoch": 0.35633626097867, "grad_norm": 1.9590387344360352, "learning_rate": 4.90771006899219e-06, "loss": 1.3128, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 568 }, { "epoch": 0.3569636135508156, "grad_norm": 1.4945135116577148, "learning_rate": 4.906971936978874e-06, "loss": 1.4596, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 569 }, { "epoch": 0.3575909661229611, "grad_norm": 1.649003267288208, "learning_rate": 4.906230920887998e-06, "loss": 1.2446, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 570 }, { "epoch": 0.3582183186951067, "grad_norm": 1.7148494720458984, "learning_rate": 4.905487021607462e-06, "loss": 1.2175, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 571 }, { "epoch": 0.3588456712672522, "grad_norm": 1.9987525939941406, "learning_rate": 4.904740240028618e-06, "loss": 1.3247, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 572 }, { "epoch": 0.3594730238393977, "grad_norm": 1.8224921226501465, "learning_rate": 4.903990577046275e-06, "loss": 1.455, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 573 }, { "epoch": 0.3601003764115433, "grad_norm": 1.7764469385147095, "learning_rate": 4.903238033558692e-06, "loss": 1.4298, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 574 }, { "epoch": 0.3607277289836888, "grad_norm": 1.8746356964111328, "learning_rate": 4.90248261046758e-06, "loss": 1.1855, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 575 }, { "epoch": 0.3613550815558344, "grad_norm": 1.7384833097457886, "learning_rate": 4.901724308678102e-06, "loss": 1.2095, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 576 }, { "epoch": 0.3619824341279799, "grad_norm": 1.754146695137024, "learning_rate": 4.900963129098868e-06, "loss": 1.2735, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 577 }, { "epoch": 0.36260978670012545, "grad_norm": 1.7974838018417358, "learning_rate": 4.900199072641937e-06, "loss": 1.2829, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 578 }, { "epoch": 0.363237139272271, "grad_norm": 1.8893485069274902, "learning_rate": 4.899432140222816e-06, "loss": 1.2911, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 579 }, { "epoch": 0.36386449184441655, "grad_norm": 1.7964454889297485, "learning_rate": 4.898662332760456e-06, "loss": 1.2089, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 580 }, { "epoch": 0.3644918444165621, "grad_norm": 1.9865061044692993, "learning_rate": 4.897889651177257e-06, "loss": 1.2704, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 581 }, { "epoch": 0.36511919698870765, "grad_norm": 1.8877671957015991, "learning_rate": 4.897114096399058e-06, "loss": 1.2918, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 582 }, { "epoch": 0.3657465495608532, "grad_norm": 1.8955698013305664, "learning_rate": 4.896335669355143e-06, "loss": 1.3619, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 583 }, { "epoch": 0.36637390213299875, "grad_norm": 1.6800644397735596, "learning_rate": 4.895554370978238e-06, "loss": 1.174, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 584 }, { "epoch": 0.3670012547051443, "grad_norm": 1.6480708122253418, "learning_rate": 4.894770202204509e-06, "loss": 1.1286, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 585 }, { "epoch": 0.36762860727728985, "grad_norm": 1.616126537322998, "learning_rate": 4.8939831639735594e-06, "loss": 1.2139, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 586 }, { "epoch": 0.3682559598494354, "grad_norm": 2.3213493824005127, "learning_rate": 4.893193257228436e-06, "loss": 1.4502, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 587 }, { "epoch": 0.36888331242158096, "grad_norm": 1.7523118257522583, "learning_rate": 4.892400482915617e-06, "loss": 1.3381, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 588 }, { "epoch": 0.3695106649937265, "grad_norm": 2.03842830657959, "learning_rate": 4.89160484198502e-06, "loss": 1.2769, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 589 }, { "epoch": 0.370138017565872, "grad_norm": 1.9613364934921265, "learning_rate": 4.890806335389996e-06, "loss": 1.1924, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 590 }, { "epoch": 0.3707653701380176, "grad_norm": 1.6123595237731934, "learning_rate": 4.89000496408733e-06, "loss": 1.2848, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 591 }, { "epoch": 0.3713927227101631, "grad_norm": 1.4787251949310303, "learning_rate": 4.889200729037241e-06, "loss": 1.3938, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 592 }, { "epoch": 0.3720200752823087, "grad_norm": 1.888031005859375, "learning_rate": 4.888393631203377e-06, "loss": 1.3367, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 593 }, { "epoch": 0.3726474278544542, "grad_norm": 1.948343276977539, "learning_rate": 4.887583671552815e-06, "loss": 1.3012, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 594 }, { "epoch": 0.37327478042659973, "grad_norm": 1.8316664695739746, "learning_rate": 4.886770851056068e-06, "loss": 1.2778, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 595 }, { "epoch": 0.3739021329987453, "grad_norm": 1.860657811164856, "learning_rate": 4.88595517068707e-06, "loss": 1.1374, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 596 }, { "epoch": 0.37452948557089083, "grad_norm": 1.8296133279800415, "learning_rate": 4.885136631423184e-06, "loss": 1.3081, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 597 }, { "epoch": 0.3751568381430364, "grad_norm": 1.8158042430877686, "learning_rate": 4.884315234245198e-06, "loss": 1.3434, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 598 }, { "epoch": 0.37578419071518193, "grad_norm": 1.9907175302505493, "learning_rate": 4.883490980137327e-06, "loss": 1.2084, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 599 }, { "epoch": 0.37641154328732745, "grad_norm": 2.1032094955444336, "learning_rate": 4.882663870087206e-06, "loss": 1.2768, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 600 }, { "epoch": 0.37703889585947303, "grad_norm": 1.874523401260376, "learning_rate": 4.881833905085892e-06, "loss": 1.2316, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 601 }, { "epoch": 0.37766624843161856, "grad_norm": 1.8574604988098145, "learning_rate": 4.881001086127866e-06, "loss": 1.3143, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 602 }, { "epoch": 0.37829360100376414, "grad_norm": 3.8513078689575195, "learning_rate": 4.880165414211027e-06, "loss": 1.2601, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 603 }, { "epoch": 0.37892095357590966, "grad_norm": 1.8717108964920044, "learning_rate": 4.8793268903366905e-06, "loss": 1.1758, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 604 }, { "epoch": 0.3795483061480552, "grad_norm": 1.7282662391662598, "learning_rate": 4.878485515509594e-06, "loss": 1.2174, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 605 }, { "epoch": 0.38017565872020076, "grad_norm": 2.066521167755127, "learning_rate": 4.8776412907378845e-06, "loss": 1.2972, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 606 }, { "epoch": 0.3808030112923463, "grad_norm": 1.7320374250411987, "learning_rate": 4.876794217033129e-06, "loss": 1.211, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 607 }, { "epoch": 0.38143036386449186, "grad_norm": 1.6623154878616333, "learning_rate": 4.875944295410308e-06, "loss": 1.2735, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 608 }, { "epoch": 0.3820577164366374, "grad_norm": 1.917044997215271, "learning_rate": 4.875091526887813e-06, "loss": 1.3399, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 609 }, { "epoch": 0.38268506900878296, "grad_norm": 2.031456470489502, "learning_rate": 4.874235912487444e-06, "loss": 1.1696, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 610 }, { "epoch": 0.3833124215809285, "grad_norm": 1.8612998723983765, "learning_rate": 4.8733774532344164e-06, "loss": 1.4226, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 611 }, { "epoch": 0.383939774153074, "grad_norm": 1.7743910551071167, "learning_rate": 4.872516150157351e-06, "loss": 1.2365, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 612 }, { "epoch": 0.3845671267252196, "grad_norm": 1.7323930263519287, "learning_rate": 4.871652004288275e-06, "loss": 1.1926, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 613 }, { "epoch": 0.3851944792973651, "grad_norm": 1.93533194065094, "learning_rate": 4.870785016662627e-06, "loss": 1.1668, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 614 }, { "epoch": 0.3858218318695107, "grad_norm": 3.5375280380249023, "learning_rate": 4.869915188319244e-06, "loss": 1.3051, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 615 }, { "epoch": 0.3864491844416562, "grad_norm": 2.405059814453125, "learning_rate": 4.8690425203003735e-06, "loss": 1.4259, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 616 }, { "epoch": 0.38707653701380174, "grad_norm": 1.943349003791809, "learning_rate": 4.86816701365166e-06, "loss": 1.202, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 617 }, { "epoch": 0.3877038895859473, "grad_norm": 1.9087837934494019, "learning_rate": 4.867288669422151e-06, "loss": 1.1793, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 618 }, { "epoch": 0.38833124215809284, "grad_norm": 1.8294497728347778, "learning_rate": 4.866407488664296e-06, "loss": 1.1885, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 619 }, { "epoch": 0.3889585947302384, "grad_norm": 1.836368441581726, "learning_rate": 4.865523472433942e-06, "loss": 1.2184, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 620 }, { "epoch": 0.38958594730238394, "grad_norm": 1.4587712287902832, "learning_rate": 4.8646366217903314e-06, "loss": 1.1466, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 621 }, { "epoch": 0.39021329987452946, "grad_norm": 2.0479862689971924, "learning_rate": 4.863746937796107e-06, "loss": 1.3139, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 622 }, { "epoch": 0.39084065244667504, "grad_norm": 1.8100470304489136, "learning_rate": 4.862854421517303e-06, "loss": 1.3105, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 623 }, { "epoch": 0.39146800501882056, "grad_norm": 1.9524677991867065, "learning_rate": 4.861959074023348e-06, "loss": 1.2959, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 624 }, { "epoch": 0.39209535759096614, "grad_norm": 1.7042853832244873, "learning_rate": 4.8610608963870665e-06, "loss": 1.3388, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 625 }, { "epoch": 0.39272271016311167, "grad_norm": 1.6724315881729126, "learning_rate": 4.860159889684668e-06, "loss": 1.22, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 626 }, { "epoch": 0.3933500627352572, "grad_norm": 1.8373382091522217, "learning_rate": 4.859256054995758e-06, "loss": 1.2115, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 627 }, { "epoch": 0.39397741530740277, "grad_norm": 1.9262843132019043, "learning_rate": 4.8583493934033245e-06, "loss": 1.3984, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 628 }, { "epoch": 0.3946047678795483, "grad_norm": 2.0423967838287354, "learning_rate": 4.857439905993748e-06, "loss": 1.3022, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 629 }, { "epoch": 0.39523212045169387, "grad_norm": 2.023704767227173, "learning_rate": 4.856527593856794e-06, "loss": 1.2688, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 630 }, { "epoch": 0.3958594730238394, "grad_norm": 2.0022521018981934, "learning_rate": 4.855612458085608e-06, "loss": 1.3507, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 631 }, { "epoch": 0.39648682559598497, "grad_norm": 2.0341274738311768, "learning_rate": 4.854694499776726e-06, "loss": 1.2695, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 632 }, { "epoch": 0.3971141781681305, "grad_norm": 1.9634671211242676, "learning_rate": 4.853773720030059e-06, "loss": 1.1983, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 633 }, { "epoch": 0.397741530740276, "grad_norm": 1.9861356019973755, "learning_rate": 4.8528501199489045e-06, "loss": 1.5236, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 634 }, { "epoch": 0.3983688833124216, "grad_norm": 1.6151918172836304, "learning_rate": 4.851923700639935e-06, "loss": 1.1524, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 635 }, { "epoch": 0.3989962358845671, "grad_norm": 1.7349764108657837, "learning_rate": 4.850994463213206e-06, "loss": 1.2273, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 636 }, { "epoch": 0.3996235884567127, "grad_norm": 2.104606866836548, "learning_rate": 4.850062408782144e-06, "loss": 1.2654, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 637 }, { "epoch": 0.4002509410288582, "grad_norm": 1.8367067575454712, "learning_rate": 4.849127538463554e-06, "loss": 1.344, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 638 }, { "epoch": 0.40087829360100374, "grad_norm": 1.7443346977233887, "learning_rate": 4.848189853377615e-06, "loss": 1.2501, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 639 }, { "epoch": 0.4015056461731493, "grad_norm": 1.8433117866516113, "learning_rate": 4.8472493546478794e-06, "loss": 1.3459, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 640 }, { "epoch": 0.40213299874529485, "grad_norm": 1.8732486963272095, "learning_rate": 4.846306043401268e-06, "loss": 1.2625, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 641 }, { "epoch": 0.4027603513174404, "grad_norm": 1.844283103942871, "learning_rate": 4.845359920768074e-06, "loss": 1.2765, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 642 }, { "epoch": 0.40338770388958595, "grad_norm": 1.86635422706604, "learning_rate": 4.8444109878819585e-06, "loss": 1.4083, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 643 }, { "epoch": 0.40401505646173147, "grad_norm": 1.824976921081543, "learning_rate": 4.843459245879952e-06, "loss": 1.3423, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 644 }, { "epoch": 0.40464240903387705, "grad_norm": 1.6277782917022705, "learning_rate": 4.842504695902447e-06, "loss": 1.131, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 645 }, { "epoch": 0.40526976160602257, "grad_norm": 1.6990870237350464, "learning_rate": 4.841547339093203e-06, "loss": 1.2938, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 646 }, { "epoch": 0.40589711417816815, "grad_norm": 1.830122470855713, "learning_rate": 4.8405871765993435e-06, "loss": 1.239, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 647 }, { "epoch": 0.4065244667503137, "grad_norm": 1.927756905555725, "learning_rate": 4.839624209571352e-06, "loss": 1.3042, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 648 }, { "epoch": 0.4071518193224592, "grad_norm": 1.8679585456848145, "learning_rate": 4.838658439163072e-06, "loss": 1.3488, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 649 }, { "epoch": 0.4077791718946048, "grad_norm": 1.6993918418884277, "learning_rate": 4.837689866531709e-06, "loss": 1.4785, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 650 }, { "epoch": 0.4084065244667503, "grad_norm": 1.6975336074829102, "learning_rate": 4.836718492837823e-06, "loss": 1.2338, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 651 }, { "epoch": 0.4090338770388959, "grad_norm": 1.8254927396774292, "learning_rate": 4.835744319245333e-06, "loss": 1.3636, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 652 }, { "epoch": 0.4096612296110414, "grad_norm": 1.6572511196136475, "learning_rate": 4.83476734692151e-06, "loss": 1.1809, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 653 }, { "epoch": 0.410288582183187, "grad_norm": 1.943337082862854, "learning_rate": 4.833787577036981e-06, "loss": 1.2904, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 654 }, { "epoch": 0.4109159347553325, "grad_norm": 1.8604482412338257, "learning_rate": 4.832805010765724e-06, "loss": 1.3767, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 655 }, { "epoch": 0.411543287327478, "grad_norm": 1.9489023685455322, "learning_rate": 4.831819649285067e-06, "loss": 1.2878, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 656 }, { "epoch": 0.4121706398996236, "grad_norm": 1.7244187593460083, "learning_rate": 4.830831493775689e-06, "loss": 1.12, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 657 }, { "epoch": 0.4127979924717691, "grad_norm": 1.3886579275131226, "learning_rate": 4.829840545421615e-06, "loss": 1.2602, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 658 }, { "epoch": 0.4134253450439147, "grad_norm": 2.1388373374938965, "learning_rate": 4.828846805410219e-06, "loss": 1.4233, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 659 }, { "epoch": 0.41405269761606023, "grad_norm": 1.8400318622589111, "learning_rate": 4.827850274932216e-06, "loss": 1.2721, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 660 }, { "epoch": 0.41468005018820575, "grad_norm": 2.0351569652557373, "learning_rate": 4.826850955181669e-06, "loss": 1.2682, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 661 }, { "epoch": 0.41530740276035133, "grad_norm": 1.9729537963867188, "learning_rate": 4.8258488473559794e-06, "loss": 1.243, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 662 }, { "epoch": 0.41593475533249685, "grad_norm": 1.8061813116073608, "learning_rate": 4.8248439526558925e-06, "loss": 1.3179, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 663 }, { "epoch": 0.41656210790464243, "grad_norm": 1.7892892360687256, "learning_rate": 4.8238362722854905e-06, "loss": 1.3885, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 664 }, { "epoch": 0.41718946047678795, "grad_norm": 1.5382590293884277, "learning_rate": 4.822825807452195e-06, "loss": 1.2888, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 665 }, { "epoch": 0.4178168130489335, "grad_norm": 1.8710049390792847, "learning_rate": 4.821812559366763e-06, "loss": 1.3353, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 666 }, { "epoch": 0.41844416562107906, "grad_norm": 1.8414349555969238, "learning_rate": 4.820796529243287e-06, "loss": 1.3406, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 667 }, { "epoch": 0.4190715181932246, "grad_norm": 1.5640252828598022, "learning_rate": 4.819777718299192e-06, "loss": 1.2562, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 668 }, { "epoch": 0.41969887076537016, "grad_norm": 1.8525867462158203, "learning_rate": 4.8187561277552376e-06, "loss": 1.4028, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 669 }, { "epoch": 0.4203262233375157, "grad_norm": 1.936888337135315, "learning_rate": 4.81773175883551e-06, "loss": 1.141, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 670 }, { "epoch": 0.4209535759096612, "grad_norm": 1.4375437498092651, "learning_rate": 4.816704612767427e-06, "loss": 1.1689, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 671 }, { "epoch": 0.4215809284818068, "grad_norm": 1.9113013744354248, "learning_rate": 4.815674690781735e-06, "loss": 1.3321, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 672 }, { "epoch": 0.4222082810539523, "grad_norm": 1.830393671989441, "learning_rate": 4.814641994112505e-06, "loss": 1.4788, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 673 }, { "epoch": 0.4228356336260979, "grad_norm": 1.8348015546798706, "learning_rate": 4.813606523997132e-06, "loss": 1.3408, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 674 }, { "epoch": 0.4234629861982434, "grad_norm": 1.7932466268539429, "learning_rate": 4.812568281676335e-06, "loss": 1.2546, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 675 }, { "epoch": 0.424090338770389, "grad_norm": 1.4640141725540161, "learning_rate": 4.811527268394157e-06, "loss": 1.234, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 676 }, { "epoch": 0.4247176913425345, "grad_norm": 1.623489499092102, "learning_rate": 4.8104834853979565e-06, "loss": 1.3964, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 677 }, { "epoch": 0.42534504391468003, "grad_norm": 1.85269033908844, "learning_rate": 4.809436933938415e-06, "loss": 1.1884, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 678 }, { "epoch": 0.4259723964868256, "grad_norm": 1.5965299606323242, "learning_rate": 4.8083876152695285e-06, "loss": 1.2753, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 679 }, { "epoch": 0.42659974905897113, "grad_norm": 1.5630314350128174, "learning_rate": 4.80733553064861e-06, "loss": 1.3914, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 680 }, { "epoch": 0.4272271016311167, "grad_norm": 1.7506418228149414, "learning_rate": 4.806280681336286e-06, "loss": 1.4983, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 681 }, { "epoch": 0.42785445420326224, "grad_norm": 1.6316046714782715, "learning_rate": 4.805223068596497e-06, "loss": 1.3471, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 682 }, { "epoch": 0.42848180677540776, "grad_norm": 1.502759337425232, "learning_rate": 4.804162693696494e-06, "loss": 1.3321, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 683 }, { "epoch": 0.42910915934755334, "grad_norm": 1.963027000427246, "learning_rate": 4.803099557906836e-06, "loss": 1.2162, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 684 }, { "epoch": 0.42973651191969886, "grad_norm": 1.7412725687026978, "learning_rate": 4.802033662501392e-06, "loss": 1.2524, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 685 }, { "epoch": 0.43036386449184444, "grad_norm": 2.261122226715088, "learning_rate": 4.800965008757339e-06, "loss": 1.3432, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 686 }, { "epoch": 0.43099121706398996, "grad_norm": 1.758253812789917, "learning_rate": 4.799893597955157e-06, "loss": 1.2964, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 687 }, { "epoch": 0.4316185696361355, "grad_norm": 1.7574810981750488, "learning_rate": 4.7988194313786275e-06, "loss": 1.3334, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 688 }, { "epoch": 0.43224592220828106, "grad_norm": 1.8565386533737183, "learning_rate": 4.797742510314838e-06, "loss": 1.3402, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 689 }, { "epoch": 0.4328732747804266, "grad_norm": 1.7075424194335938, "learning_rate": 4.796662836054176e-06, "loss": 1.2549, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 690 }, { "epoch": 0.43350062735257217, "grad_norm": 1.8174091577529907, "learning_rate": 4.795580409890325e-06, "loss": 1.21, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 691 }, { "epoch": 0.4341279799247177, "grad_norm": 2.091965675354004, "learning_rate": 4.794495233120268e-06, "loss": 1.3602, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 692 }, { "epoch": 0.4347553324968632, "grad_norm": 1.618514060974121, "learning_rate": 4.793407307044285e-06, "loss": 1.3823, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 693 }, { "epoch": 0.4353826850690088, "grad_norm": 1.4373396635055542, "learning_rate": 4.792316632965947e-06, "loss": 1.1906, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 694 }, { "epoch": 0.4360100376411543, "grad_norm": 1.89703369140625, "learning_rate": 4.791223212192121e-06, "loss": 1.2905, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 695 }, { "epoch": 0.4366373902132999, "grad_norm": 1.8535451889038086, "learning_rate": 4.790127046032963e-06, "loss": 1.2045, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 696 }, { "epoch": 0.4372647427854454, "grad_norm": 1.8034179210662842, "learning_rate": 4.789028135801919e-06, "loss": 1.2953, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 697 }, { "epoch": 0.437892095357591, "grad_norm": 1.4916350841522217, "learning_rate": 4.787926482815724e-06, "loss": 1.2158, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 698 }, { "epoch": 0.4385194479297365, "grad_norm": 1.2732455730438232, "learning_rate": 4.786822088394397e-06, "loss": 1.405, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 699 }, { "epoch": 0.43914680050188204, "grad_norm": 1.8891061544418335, "learning_rate": 4.7857149538612454e-06, "loss": 1.2653, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 700 }, { "epoch": 0.4397741530740276, "grad_norm": 2.2417843341827393, "learning_rate": 4.784605080542857e-06, "loss": 1.3876, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 701 }, { "epoch": 0.44040150564617314, "grad_norm": 1.9334285259246826, "learning_rate": 4.783492469769103e-06, "loss": 1.3257, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 702 }, { "epoch": 0.4410288582183187, "grad_norm": 1.7055387496948242, "learning_rate": 4.7823771228731325e-06, "loss": 1.2261, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 703 }, { "epoch": 0.44165621079046424, "grad_norm": 1.9559547901153564, "learning_rate": 4.7812590411913755e-06, "loss": 1.2834, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 704 }, { "epoch": 0.44228356336260977, "grad_norm": 1.906968116760254, "learning_rate": 4.780138226063537e-06, "loss": 1.3762, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 705 }, { "epoch": 0.44291091593475534, "grad_norm": 2.4029104709625244, "learning_rate": 4.779014678832599e-06, "loss": 1.3537, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 706 }, { "epoch": 0.44353826850690087, "grad_norm": 1.7167798280715942, "learning_rate": 4.777888400844815e-06, "loss": 1.219, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 707 }, { "epoch": 0.44416562107904645, "grad_norm": 1.8012571334838867, "learning_rate": 4.7767593934497145e-06, "loss": 1.3654, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 708 }, { "epoch": 0.44479297365119197, "grad_norm": 2.2008590698242188, "learning_rate": 4.775627658000091e-06, "loss": 1.2487, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 709 }, { "epoch": 0.4454203262233375, "grad_norm": 1.9626473188400269, "learning_rate": 4.774493195852012e-06, "loss": 1.2511, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 710 }, { "epoch": 0.44604767879548307, "grad_norm": 2.1587445735931396, "learning_rate": 4.773356008364812e-06, "loss": 1.2596, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 711 }, { "epoch": 0.4466750313676286, "grad_norm": 1.9012348651885986, "learning_rate": 4.772216096901089e-06, "loss": 1.315, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 712 }, { "epoch": 0.4473023839397742, "grad_norm": 1.6769298315048218, "learning_rate": 4.771073462826704e-06, "loss": 1.1517, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 713 }, { "epoch": 0.4479297365119197, "grad_norm": 1.7850003242492676, "learning_rate": 4.769928107510784e-06, "loss": 1.319, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 714 }, { "epoch": 0.4485570890840652, "grad_norm": 1.8586574792861938, "learning_rate": 4.768780032325714e-06, "loss": 1.3514, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 715 }, { "epoch": 0.4491844416562108, "grad_norm": 1.946988821029663, "learning_rate": 4.767629238647138e-06, "loss": 1.2864, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 716 }, { "epoch": 0.4498117942283563, "grad_norm": 1.9582651853561401, "learning_rate": 4.766475727853959e-06, "loss": 1.2603, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 717 }, { "epoch": 0.4504391468005019, "grad_norm": 1.9077582359313965, "learning_rate": 4.765319501328332e-06, "loss": 1.3582, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 718 }, { "epoch": 0.4510664993726474, "grad_norm": 1.8661820888519287, "learning_rate": 4.7641605604556725e-06, "loss": 1.2798, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 719 }, { "epoch": 0.451693851944793, "grad_norm": 1.7664010524749756, "learning_rate": 4.762998906624643e-06, "loss": 1.2414, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 720 }, { "epoch": 0.4523212045169385, "grad_norm": 1.740552544593811, "learning_rate": 4.761834541227157e-06, "loss": 1.3123, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 721 }, { "epoch": 0.45294855708908405, "grad_norm": 2.0045013427734375, "learning_rate": 4.760667465658378e-06, "loss": 1.3067, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 722 }, { "epoch": 0.4535759096612296, "grad_norm": 1.4887140989303589, "learning_rate": 4.75949768131672e-06, "loss": 1.2057, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 723 }, { "epoch": 0.45420326223337515, "grad_norm": 4.028663635253906, "learning_rate": 4.758325189603838e-06, "loss": 1.2178, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 724 }, { "epoch": 0.4548306148055207, "grad_norm": 1.8994674682617188, "learning_rate": 4.757149991924633e-06, "loss": 1.2722, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 725 }, { "epoch": 0.45545796737766625, "grad_norm": 1.8742094039916992, "learning_rate": 4.755972089687248e-06, "loss": 1.2098, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 726 }, { "epoch": 0.4560853199498118, "grad_norm": 1.8946446180343628, "learning_rate": 4.754791484303068e-06, "loss": 1.2502, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 727 }, { "epoch": 0.45671267252195735, "grad_norm": 1.503798246383667, "learning_rate": 4.753608177186714e-06, "loss": 1.2383, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 728 }, { "epoch": 0.4573400250941029, "grad_norm": 1.4462285041809082, "learning_rate": 4.752422169756048e-06, "loss": 1.2647, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 729 }, { "epoch": 0.45796737766624845, "grad_norm": 1.9991223812103271, "learning_rate": 4.751233463432165e-06, "loss": 1.2988, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 730 }, { "epoch": 0.458594730238394, "grad_norm": 1.5803600549697876, "learning_rate": 4.7500420596393935e-06, "loss": 1.2609, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 731 }, { "epoch": 0.4592220828105395, "grad_norm": 1.5404584407806396, "learning_rate": 4.748847959805297e-06, "loss": 1.2166, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 732 }, { "epoch": 0.4598494353826851, "grad_norm": 1.8132827281951904, "learning_rate": 4.7476511653606676e-06, "loss": 1.3939, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 733 }, { "epoch": 0.4604767879548306, "grad_norm": 1.800467610359192, "learning_rate": 4.7464516777395234e-06, "loss": 1.2325, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 734 }, { "epoch": 0.4611041405269762, "grad_norm": 1.9034929275512695, "learning_rate": 4.7452494983791155e-06, "loss": 1.2562, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 735 }, { "epoch": 0.4617314930991217, "grad_norm": 1.7843937873840332, "learning_rate": 4.744044628719916e-06, "loss": 1.3663, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 736 }, { "epoch": 0.4623588456712672, "grad_norm": 2.562595844268799, "learning_rate": 4.74283707020562e-06, "loss": 1.2311, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 737 }, { "epoch": 0.4629861982434128, "grad_norm": 1.9093017578125, "learning_rate": 4.741626824283149e-06, "loss": 1.3225, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 738 }, { "epoch": 0.46361355081555833, "grad_norm": 1.5254753828048706, "learning_rate": 4.740413892402639e-06, "loss": 1.161, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 739 }, { "epoch": 0.4642409033877039, "grad_norm": 1.795628547668457, "learning_rate": 4.739198276017449e-06, "loss": 1.2754, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 740 }, { "epoch": 0.46486825595984943, "grad_norm": 1.8925135135650635, "learning_rate": 4.7379799765841525e-06, "loss": 1.208, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 741 }, { "epoch": 0.465495608531995, "grad_norm": 1.8105072975158691, "learning_rate": 4.736758995562536e-06, "loss": 1.3267, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 742 }, { "epoch": 0.46612296110414053, "grad_norm": 1.910277009010315, "learning_rate": 4.735535334415604e-06, "loss": 1.1361, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 743 }, { "epoch": 0.46675031367628605, "grad_norm": 1.7848541736602783, "learning_rate": 4.734308994609568e-06, "loss": 1.405, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 744 }, { "epoch": 0.46737766624843163, "grad_norm": 1.724311351776123, "learning_rate": 4.73307997761385e-06, "loss": 1.2611, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 745 }, { "epoch": 0.46800501882057716, "grad_norm": 1.8376044034957886, "learning_rate": 4.731848284901082e-06, "loss": 1.4162, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 746 }, { "epoch": 0.46863237139272274, "grad_norm": 1.8465354442596436, "learning_rate": 4.7306139179471e-06, "loss": 1.3957, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 747 }, { "epoch": 0.46925972396486826, "grad_norm": 1.9396543502807617, "learning_rate": 4.7293768782309446e-06, "loss": 1.2662, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 748 }, { "epoch": 0.4698870765370138, "grad_norm": 1.952086091041565, "learning_rate": 4.72813716723486e-06, "loss": 1.2586, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 749 }, { "epoch": 0.47051442910915936, "grad_norm": 1.8261383771896362, "learning_rate": 4.726894786444289e-06, "loss": 1.2385, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 750 }, { "epoch": 0.4711417816813049, "grad_norm": 1.561482548713684, "learning_rate": 4.725649737347877e-06, "loss": 1.1562, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 751 }, { "epoch": 0.47176913425345046, "grad_norm": 1.9331833124160767, "learning_rate": 4.724402021437463e-06, "loss": 1.1425, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 752 }, { "epoch": 0.472396486825596, "grad_norm": 1.876682162284851, "learning_rate": 4.723151640208084e-06, "loss": 1.327, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 753 }, { "epoch": 0.4730238393977415, "grad_norm": 1.957084059715271, "learning_rate": 4.721898595157969e-06, "loss": 1.3078, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 754 }, { "epoch": 0.4736511919698871, "grad_norm": 1.9063243865966797, "learning_rate": 4.720642887788538e-06, "loss": 1.4485, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 755 }, { "epoch": 0.4742785445420326, "grad_norm": 1.6841647624969482, "learning_rate": 4.719384519604404e-06, "loss": 1.2789, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 756 }, { "epoch": 0.4749058971141782, "grad_norm": 1.864952802658081, "learning_rate": 4.718123492113367e-06, "loss": 1.1969, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 757 }, { "epoch": 0.4755332496863237, "grad_norm": 1.3406996726989746, "learning_rate": 4.716859806826412e-06, "loss": 1.2043, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 758 }, { "epoch": 0.47616060225846923, "grad_norm": 2.2283079624176025, "learning_rate": 4.7155934652577095e-06, "loss": 1.3071, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 759 }, { "epoch": 0.4767879548306148, "grad_norm": 1.748877763748169, "learning_rate": 4.714324468924614e-06, "loss": 1.3739, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 760 }, { "epoch": 0.47741530740276034, "grad_norm": 1.9405159950256348, "learning_rate": 4.71305281934766e-06, "loss": 1.1123, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 761 }, { "epoch": 0.4780426599749059, "grad_norm": 1.994694709777832, "learning_rate": 4.71177851805056e-06, "loss": 1.2621, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 762 }, { "epoch": 0.47867001254705144, "grad_norm": 1.9419386386871338, "learning_rate": 4.7105015665602064e-06, "loss": 1.2656, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 763 }, { "epoch": 0.479297365119197, "grad_norm": 1.8195141553878784, "learning_rate": 4.709221966406664e-06, "loss": 1.1962, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 764 }, { "epoch": 0.47992471769134254, "grad_norm": 1.7010425329208374, "learning_rate": 4.707939719123175e-06, "loss": 1.3749, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 765 }, { "epoch": 0.48055207026348806, "grad_norm": 1.808499813079834, "learning_rate": 4.70665482624615e-06, "loss": 1.1261, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 766 }, { "epoch": 0.48117942283563364, "grad_norm": 1.8877878189086914, "learning_rate": 4.705367289315172e-06, "loss": 1.358, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 767 }, { "epoch": 0.48180677540777916, "grad_norm": 2.219843626022339, "learning_rate": 4.704077109872989e-06, "loss": 1.3687, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 768 }, { "epoch": 0.48243412797992474, "grad_norm": 1.7579948902130127, "learning_rate": 4.702784289465521e-06, "loss": 1.2922, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 769 }, { "epoch": 0.48306148055207027, "grad_norm": 1.767750859260559, "learning_rate": 4.701488829641845e-06, "loss": 1.2044, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 770 }, { "epoch": 0.4836888331242158, "grad_norm": 1.6337237358093262, "learning_rate": 4.700190731954209e-06, "loss": 1.4174, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 771 }, { "epoch": 0.48431618569636137, "grad_norm": 1.8537286520004272, "learning_rate": 4.698889997958013e-06, "loss": 1.4132, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 772 }, { "epoch": 0.4849435382685069, "grad_norm": 1.842587947845459, "learning_rate": 4.697586629211822e-06, "loss": 1.2961, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 773 }, { "epoch": 0.48557089084065247, "grad_norm": 1.8020635843276978, "learning_rate": 4.696280627277356e-06, "loss": 1.2704, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 774 }, { "epoch": 0.486198243412798, "grad_norm": 2.015141248703003, "learning_rate": 4.694971993719491e-06, "loss": 1.276, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 775 }, { "epoch": 0.4868255959849435, "grad_norm": 1.6180552244186401, "learning_rate": 4.6936607301062545e-06, "loss": 1.2509, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 776 }, { "epoch": 0.4874529485570891, "grad_norm": 2.057633876800537, "learning_rate": 4.692346838008828e-06, "loss": 1.2712, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 777 }, { "epoch": 0.4880803011292346, "grad_norm": 1.8549116849899292, "learning_rate": 4.691030319001538e-06, "loss": 1.2617, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 778 }, { "epoch": 0.4887076537013802, "grad_norm": 1.8069357872009277, "learning_rate": 4.689711174661864e-06, "loss": 1.1867, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 779 }, { "epoch": 0.4893350062735257, "grad_norm": 2.0871284008026123, "learning_rate": 4.688389406570427e-06, "loss": 1.2402, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 780 }, { "epoch": 0.48996235884567124, "grad_norm": 1.4803322553634644, "learning_rate": 4.687065016310996e-06, "loss": 1.2458, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 781 }, { "epoch": 0.4905897114178168, "grad_norm": 1.9475903511047363, "learning_rate": 4.685738005470478e-06, "loss": 1.3481, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 782 }, { "epoch": 0.49121706398996234, "grad_norm": 1.8729878664016724, "learning_rate": 4.684408375638921e-06, "loss": 1.3966, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 783 }, { "epoch": 0.4918444165621079, "grad_norm": 1.7364298105239868, "learning_rate": 4.683076128409512e-06, "loss": 1.2473, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 784 }, { "epoch": 0.49247176913425345, "grad_norm": 1.6857995986938477, "learning_rate": 4.681741265378574e-06, "loss": 1.3229, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 785 }, { "epoch": 0.493099121706399, "grad_norm": 2.2026307582855225, "learning_rate": 4.680403788145566e-06, "loss": 1.298, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 786 }, { "epoch": 0.49372647427854455, "grad_norm": 2.3991730213165283, "learning_rate": 4.679063698313074e-06, "loss": 1.2762, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 787 }, { "epoch": 0.49435382685069007, "grad_norm": 1.9600284099578857, "learning_rate": 4.6777209974868194e-06, "loss": 1.3338, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 788 }, { "epoch": 0.49498117942283565, "grad_norm": 1.7276560068130493, "learning_rate": 4.676375687275653e-06, "loss": 1.2017, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 789 }, { "epoch": 0.49560853199498117, "grad_norm": 2.1582796573638916, "learning_rate": 4.675027769291547e-06, "loss": 1.2509, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 790 }, { "epoch": 0.49623588456712675, "grad_norm": 1.7889536619186401, "learning_rate": 4.673677245149602e-06, "loss": 1.3825, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 791 }, { "epoch": 0.4968632371392723, "grad_norm": 1.567894697189331, "learning_rate": 4.672324116468042e-06, "loss": 1.2841, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 792 }, { "epoch": 0.4974905897114178, "grad_norm": 1.4516322612762451, "learning_rate": 4.67096838486821e-06, "loss": 1.2557, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 793 }, { "epoch": 0.4981179422835634, "grad_norm": 1.992506980895996, "learning_rate": 4.669610051974566e-06, "loss": 1.2688, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 794 }, { "epoch": 0.4987452948557089, "grad_norm": 1.7153563499450684, "learning_rate": 4.668249119414692e-06, "loss": 1.4142, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 795 }, { "epoch": 0.4993726474278545, "grad_norm": 1.7995789051055908, "learning_rate": 4.66688558881928e-06, "loss": 1.2004, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 796 }, { "epoch": 0.5, "grad_norm": 1.726151943206787, "learning_rate": 4.6655194618221385e-06, "loss": 1.2882, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 797 }, { "epoch": 0.5006273525721455, "grad_norm": 1.8986132144927979, "learning_rate": 4.664150740060186e-06, "loss": 1.4326, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 798 }, { "epoch": 0.501254705144291, "grad_norm": 1.6591218709945679, "learning_rate": 4.6627794251734485e-06, "loss": 1.251, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 799 }, { "epoch": 0.5018820577164367, "grad_norm": 1.7284833192825317, "learning_rate": 4.6614055188050615e-06, "loss": 1.3658, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 800 }, { "epoch": 0.5025094102885822, "grad_norm": 1.6566392183303833, "learning_rate": 4.660029022601264e-06, "loss": 1.2381, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 801 }, { "epoch": 0.5031367628607277, "grad_norm": 2.047153949737549, "learning_rate": 4.6586499382113985e-06, "loss": 1.5228, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 802 }, { "epoch": 0.5037641154328732, "grad_norm": 1.376027226448059, "learning_rate": 4.6572682672879095e-06, "loss": 1.1302, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 803 }, { "epoch": 0.5043914680050188, "grad_norm": 1.8712245225906372, "learning_rate": 4.655884011486341e-06, "loss": 1.3529, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 804 }, { "epoch": 0.5050188205771644, "grad_norm": 1.777405858039856, "learning_rate": 4.654497172465333e-06, "loss": 1.2575, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 805 }, { "epoch": 0.5056461731493099, "grad_norm": 1.7459986209869385, "learning_rate": 4.653107751886621e-06, "loss": 1.1989, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 806 }, { "epoch": 0.5062735257214555, "grad_norm": 1.6451423168182373, "learning_rate": 4.651715751415033e-06, "loss": 1.2338, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 807 }, { "epoch": 0.506900878293601, "grad_norm": 1.2586438655853271, "learning_rate": 4.65032117271849e-06, "loss": 1.2002, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 808 }, { "epoch": 0.5075282308657465, "grad_norm": 1.8858215808868408, "learning_rate": 4.648924017468003e-06, "loss": 1.1834, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 809 }, { "epoch": 0.5081555834378921, "grad_norm": 2.0493290424346924, "learning_rate": 4.647524287337668e-06, "loss": 1.3408, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 810 }, { "epoch": 0.5087829360100377, "grad_norm": 1.9480719566345215, "learning_rate": 4.646121984004666e-06, "loss": 1.2574, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 811 }, { "epoch": 0.5094102885821832, "grad_norm": 1.9408248662948608, "learning_rate": 4.644717109149262e-06, "loss": 1.3718, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 812 }, { "epoch": 0.5100376411543287, "grad_norm": 1.7562613487243652, "learning_rate": 4.643309664454806e-06, "loss": 1.3656, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 813 }, { "epoch": 0.5106649937264742, "grad_norm": 1.7359344959259033, "learning_rate": 4.641899651607721e-06, "loss": 1.3346, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 814 }, { "epoch": 0.5112923462986199, "grad_norm": 1.976855993270874, "learning_rate": 4.640487072297509e-06, "loss": 1.2468, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 815 }, { "epoch": 0.5119196988707654, "grad_norm": 1.5342873334884644, "learning_rate": 4.6390719282167515e-06, "loss": 1.1683, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 816 }, { "epoch": 0.5125470514429109, "grad_norm": 1.69839346408844, "learning_rate": 4.637654221061098e-06, "loss": 1.2034, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 817 }, { "epoch": 0.5131744040150564, "grad_norm": 2.431257486343384, "learning_rate": 4.636233952529271e-06, "loss": 1.3565, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 818 }, { "epoch": 0.513801756587202, "grad_norm": 1.39933180809021, "learning_rate": 4.634811124323062e-06, "loss": 1.3246, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 819 }, { "epoch": 0.5144291091593476, "grad_norm": 2.4654202461242676, "learning_rate": 4.6333857381473305e-06, "loss": 1.3884, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 820 }, { "epoch": 0.5150564617314931, "grad_norm": 1.9540578126907349, "learning_rate": 4.631957795709999e-06, "loss": 1.3104, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 821 }, { "epoch": 0.5156838143036386, "grad_norm": 1.9116793870925903, "learning_rate": 4.630527298722056e-06, "loss": 1.2827, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 822 }, { "epoch": 0.5163111668757842, "grad_norm": 2.0660154819488525, "learning_rate": 4.629094248897546e-06, "loss": 1.3751, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 823 }, { "epoch": 0.5169385194479298, "grad_norm": 1.8745118379592896, "learning_rate": 4.627658647953579e-06, "loss": 1.3202, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 824 }, { "epoch": 0.5175658720200753, "grad_norm": 1.8826225996017456, "learning_rate": 4.626220497610315e-06, "loss": 1.2711, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 825 }, { "epoch": 0.5181932245922208, "grad_norm": 2.0264394283294678, "learning_rate": 4.6247797995909735e-06, "loss": 1.2534, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 826 }, { "epoch": 0.5188205771643664, "grad_norm": 2.2216804027557373, "learning_rate": 4.623336555621825e-06, "loss": 1.3163, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 827 }, { "epoch": 0.5194479297365119, "grad_norm": 1.9490082263946533, "learning_rate": 4.621890767432189e-06, "loss": 1.3073, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 828 }, { "epoch": 0.5200752823086575, "grad_norm": 2.479620933532715, "learning_rate": 4.620442436754438e-06, "loss": 1.3627, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 829 }, { "epoch": 0.520702634880803, "grad_norm": 1.924384593963623, "learning_rate": 4.618991565323987e-06, "loss": 1.3137, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 830 }, { "epoch": 0.5213299874529486, "grad_norm": 1.8495651483535767, "learning_rate": 4.617538154879295e-06, "loss": 1.2852, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 831 }, { "epoch": 0.5219573400250941, "grad_norm": 1.8771755695343018, "learning_rate": 4.616082207161867e-06, "loss": 1.242, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 832 }, { "epoch": 0.5225846925972396, "grad_norm": 1.8403741121292114, "learning_rate": 4.614623723916245e-06, "loss": 1.2524, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 833 }, { "epoch": 0.5232120451693852, "grad_norm": 1.9191933870315552, "learning_rate": 4.613162706890011e-06, "loss": 1.4476, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 834 }, { "epoch": 0.5238393977415308, "grad_norm": 2.0666935443878174, "learning_rate": 4.611699157833781e-06, "loss": 1.3978, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 835 }, { "epoch": 0.5244667503136763, "grad_norm": 1.9643383026123047, "learning_rate": 4.610233078501207e-06, "loss": 1.1818, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 836 }, { "epoch": 0.5250941028858218, "grad_norm": 2.003880262374878, "learning_rate": 4.608764470648971e-06, "loss": 1.2073, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 837 }, { "epoch": 0.5257214554579673, "grad_norm": 1.6099451780319214, "learning_rate": 4.607293336036787e-06, "loss": 1.2718, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 838 }, { "epoch": 0.526348808030113, "grad_norm": 1.862188458442688, "learning_rate": 4.605819676427393e-06, "loss": 1.2728, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 839 }, { "epoch": 0.5269761606022585, "grad_norm": 2.5151021480560303, "learning_rate": 4.604343493586558e-06, "loss": 1.2652, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 840 }, { "epoch": 0.527603513174404, "grad_norm": 1.627556562423706, "learning_rate": 4.602864789283068e-06, "loss": 1.2863, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 841 }, { "epoch": 0.5282308657465495, "grad_norm": 1.7955687046051025, "learning_rate": 4.601383565288735e-06, "loss": 1.298, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 842 }, { "epoch": 0.5288582183186951, "grad_norm": 1.9044831991195679, "learning_rate": 4.599899823378389e-06, "loss": 1.2663, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 843 }, { "epoch": 0.5294855708908407, "grad_norm": 1.531988501548767, "learning_rate": 4.598413565329876e-06, "loss": 1.2011, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 844 }, { "epoch": 0.5301129234629862, "grad_norm": 2.0963053703308105, "learning_rate": 4.5969247929240555e-06, "loss": 1.4239, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 845 }, { "epoch": 0.5307402760351317, "grad_norm": 1.9998327493667603, "learning_rate": 4.595433507944805e-06, "loss": 1.3493, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 846 }, { "epoch": 0.5313676286072773, "grad_norm": 1.8099595308303833, "learning_rate": 4.593939712179007e-06, "loss": 1.3457, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 847 }, { "epoch": 0.5319949811794228, "grad_norm": 1.8672784566879272, "learning_rate": 4.5924434074165554e-06, "loss": 1.2896, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 848 }, { "epoch": 0.5326223337515684, "grad_norm": 1.7104904651641846, "learning_rate": 4.590944595450351e-06, "loss": 1.1611, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 849 }, { "epoch": 0.533249686323714, "grad_norm": 2.010240077972412, "learning_rate": 4.589443278076296e-06, "loss": 1.1827, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 850 }, { "epoch": 0.5338770388958595, "grad_norm": 2.0523414611816406, "learning_rate": 4.587939457093296e-06, "loss": 1.2599, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 851 }, { "epoch": 0.534504391468005, "grad_norm": 1.7473045587539673, "learning_rate": 4.586433134303257e-06, "loss": 1.1739, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 852 }, { "epoch": 0.5351317440401505, "grad_norm": 2.0589144229888916, "learning_rate": 4.584924311511083e-06, "loss": 1.304, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 853 }, { "epoch": 0.5357590966122961, "grad_norm": 1.6641743183135986, "learning_rate": 4.5834129905246725e-06, "loss": 1.3376, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 854 }, { "epoch": 0.5363864491844417, "grad_norm": 1.9077972173690796, "learning_rate": 4.5818991731549185e-06, "loss": 1.332, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 855 }, { "epoch": 0.5370138017565872, "grad_norm": 1.9205858707427979, "learning_rate": 4.5803828612157035e-06, "loss": 1.1763, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 856 }, { "epoch": 0.5376411543287327, "grad_norm": 1.5044771432876587, "learning_rate": 4.5788640565239e-06, "loss": 1.1735, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 857 }, { "epoch": 0.5382685069008782, "grad_norm": 1.7473088502883911, "learning_rate": 4.577342760899368e-06, "loss": 1.4267, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 858 }, { "epoch": 0.5388958594730239, "grad_norm": 1.798748254776001, "learning_rate": 4.575818976164952e-06, "loss": 1.3393, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 859 }, { "epoch": 0.5395232120451694, "grad_norm": 1.7975914478302002, "learning_rate": 4.574292704146478e-06, "loss": 1.1311, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 860 }, { "epoch": 0.5401505646173149, "grad_norm": 1.6357945203781128, "learning_rate": 4.5727639466727525e-06, "loss": 1.3367, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 861 }, { "epoch": 0.5407779171894604, "grad_norm": 1.763322353363037, "learning_rate": 4.5712327055755605e-06, "loss": 1.2515, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 862 }, { "epoch": 0.541405269761606, "grad_norm": 1.855243444442749, "learning_rate": 4.569698982689664e-06, "loss": 1.1587, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 863 }, { "epoch": 0.5420326223337516, "grad_norm": 1.829127550125122, "learning_rate": 4.5681627798527965e-06, "loss": 1.4049, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 864 }, { "epoch": 0.5426599749058971, "grad_norm": 1.9672813415527344, "learning_rate": 4.566624098905665e-06, "loss": 1.4499, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 865 }, { "epoch": 0.5432873274780426, "grad_norm": 1.776489496231079, "learning_rate": 4.565082941691944e-06, "loss": 1.2612, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 866 }, { "epoch": 0.5439146800501882, "grad_norm": 1.8452541828155518, "learning_rate": 4.5635393100582785e-06, "loss": 1.3581, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 867 }, { "epoch": 0.5445420326223338, "grad_norm": 2.2932517528533936, "learning_rate": 4.561993205854273e-06, "loss": 1.317, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 868 }, { "epoch": 0.5451693851944793, "grad_norm": 1.9137377738952637, "learning_rate": 4.560444630932499e-06, "loss": 1.2381, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 869 }, { "epoch": 0.5457967377666249, "grad_norm": 1.4997367858886719, "learning_rate": 4.558893587148488e-06, "loss": 1.2208, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 870 }, { "epoch": 0.5464240903387704, "grad_norm": 1.9404653310775757, "learning_rate": 4.5573400763607264e-06, "loss": 1.4, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 871 }, { "epoch": 0.5470514429109159, "grad_norm": 1.885493278503418, "learning_rate": 4.555784100430662e-06, "loss": 1.1995, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 872 }, { "epoch": 0.5476787954830615, "grad_norm": 1.9907666444778442, "learning_rate": 4.554225661222692e-06, "loss": 1.2612, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 873 }, { "epoch": 0.548306148055207, "grad_norm": 1.9186691045761108, "learning_rate": 4.552664760604167e-06, "loss": 1.3292, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 874 }, { "epoch": 0.5489335006273526, "grad_norm": 1.8157944679260254, "learning_rate": 4.5511014004453855e-06, "loss": 1.3547, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 875 }, { "epoch": 0.5495608531994981, "grad_norm": 1.961639165878296, "learning_rate": 4.549535582619595e-06, "loss": 1.2362, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 876 }, { "epoch": 0.5501882057716436, "grad_norm": 1.9759812355041504, "learning_rate": 4.547967309002988e-06, "loss": 1.2151, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 877 }, { "epoch": 0.5508155583437893, "grad_norm": 1.2574729919433594, "learning_rate": 4.5463965814746955e-06, "loss": 1.1432, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 878 }, { "epoch": 0.5514429109159348, "grad_norm": 2.019104242324829, "learning_rate": 4.544823401916794e-06, "loss": 1.2727, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 879 }, { "epoch": 0.5520702634880803, "grad_norm": 1.6454278230667114, "learning_rate": 4.543247772214297e-06, "loss": 1.2706, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 880 }, { "epoch": 0.5526976160602258, "grad_norm": 1.697099208831787, "learning_rate": 4.541669694255149e-06, "loss": 1.2914, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 881 }, { "epoch": 0.5533249686323714, "grad_norm": 1.8443635702133179, "learning_rate": 4.540089169930234e-06, "loss": 1.3881, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 882 }, { "epoch": 0.553952321204517, "grad_norm": 2.3314108848571777, "learning_rate": 4.538506201133366e-06, "loss": 1.5001, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 883 }, { "epoch": 0.5545796737766625, "grad_norm": 1.8084019422531128, "learning_rate": 4.536920789761286e-06, "loss": 1.3709, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 884 }, { "epoch": 0.555207026348808, "grad_norm": 1.7493175268173218, "learning_rate": 4.535332937713662e-06, "loss": 1.19, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 885 }, { "epoch": 0.5558343789209536, "grad_norm": 1.8383666276931763, "learning_rate": 4.533742646893086e-06, "loss": 1.259, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 886 }, { "epoch": 0.5564617314930991, "grad_norm": 2.549210786819458, "learning_rate": 4.5321499192050765e-06, "loss": 1.1625, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 887 }, { "epoch": 0.5570890840652447, "grad_norm": 1.8553715944290161, "learning_rate": 4.530554756558068e-06, "loss": 1.1967, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 888 }, { "epoch": 0.5577164366373902, "grad_norm": 2.863048553466797, "learning_rate": 4.528957160863412e-06, "loss": 1.1529, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 889 }, { "epoch": 0.5583437892095358, "grad_norm": 1.7921923398971558, "learning_rate": 4.527357134035379e-06, "loss": 1.3434, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 890 }, { "epoch": 0.5589711417816813, "grad_norm": 1.861366629600525, "learning_rate": 4.525754677991147e-06, "loss": 1.3188, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 891 }, { "epoch": 0.5595984943538268, "grad_norm": 1.7305035591125488, "learning_rate": 4.524149794650811e-06, "loss": 1.2743, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 892 }, { "epoch": 0.5602258469259724, "grad_norm": 1.790932059288025, "learning_rate": 4.522542485937369e-06, "loss": 1.2471, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 893 }, { "epoch": 0.560853199498118, "grad_norm": 1.6654341220855713, "learning_rate": 4.520932753776729e-06, "loss": 1.1872, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 894 }, { "epoch": 0.5614805520702635, "grad_norm": 1.4181909561157227, "learning_rate": 4.519320600097702e-06, "loss": 1.243, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 895 }, { "epoch": 0.562107904642409, "grad_norm": 1.785009741783142, "learning_rate": 4.5177060268319985e-06, "loss": 1.2753, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 896 }, { "epoch": 0.5627352572145545, "grad_norm": 1.4762773513793945, "learning_rate": 4.51608903591423e-06, "loss": 1.1472, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 897 }, { "epoch": 0.5633626097867002, "grad_norm": 1.899272084236145, "learning_rate": 4.514469629281905e-06, "loss": 1.2728, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 898 }, { "epoch": 0.5639899623588457, "grad_norm": 1.8159464597702026, "learning_rate": 4.512847808875424e-06, "loss": 1.3019, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 899 }, { "epoch": 0.5646173149309912, "grad_norm": 1.9190819263458252, "learning_rate": 4.511223576638084e-06, "loss": 1.3132, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 900 }, { "epoch": 0.5652446675031367, "grad_norm": 1.9054348468780518, "learning_rate": 4.5095969345160685e-06, "loss": 1.3536, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 901 }, { "epoch": 0.5658720200752823, "grad_norm": 1.8072419166564941, "learning_rate": 4.5079678844584505e-06, "loss": 1.3999, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 902 }, { "epoch": 0.5664993726474279, "grad_norm": 2.1858222484588623, "learning_rate": 4.5063364284171866e-06, "loss": 1.3366, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 903 }, { "epoch": 0.5671267252195734, "grad_norm": 1.7815707921981812, "learning_rate": 4.504702568347117e-06, "loss": 1.1586, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 904 }, { "epoch": 0.5677540777917189, "grad_norm": 1.7281264066696167, "learning_rate": 4.503066306205963e-06, "loss": 1.2503, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 905 }, { "epoch": 0.5683814303638645, "grad_norm": 1.8887368440628052, "learning_rate": 4.501427643954324e-06, "loss": 1.2161, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 906 }, { "epoch": 0.56900878293601, "grad_norm": 1.9003872871398926, "learning_rate": 4.499786583555675e-06, "loss": 1.2358, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 907 }, { "epoch": 0.5696361355081556, "grad_norm": 1.6889976263046265, "learning_rate": 4.498143126976362e-06, "loss": 1.2401, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 908 }, { "epoch": 0.5702634880803011, "grad_norm": 1.8518801927566528, "learning_rate": 4.4964972761856086e-06, "loss": 1.2482, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 909 }, { "epoch": 0.5708908406524467, "grad_norm": 1.7183051109313965, "learning_rate": 4.494849033155499e-06, "loss": 1.3633, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 910 }, { "epoch": 0.5715181932245922, "grad_norm": 1.5683485269546509, "learning_rate": 4.4931983998609915e-06, "loss": 1.2427, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 911 }, { "epoch": 0.5721455457967378, "grad_norm": 1.8224354982376099, "learning_rate": 4.491545378279903e-06, "loss": 1.2505, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 912 }, { "epoch": 0.5727728983688833, "grad_norm": 1.7461055517196655, "learning_rate": 4.489889970392915e-06, "loss": 1.1269, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 913 }, { "epoch": 0.5734002509410289, "grad_norm": 1.4676216840744019, "learning_rate": 4.4882321781835666e-06, "loss": 1.3263, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 914 }, { "epoch": 0.5740276035131744, "grad_norm": 1.9573190212249756, "learning_rate": 4.486572003638254e-06, "loss": 1.2915, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 915 }, { "epoch": 0.5746549560853199, "grad_norm": 1.5196433067321777, "learning_rate": 4.4849094487462305e-06, "loss": 1.236, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 916 }, { "epoch": 0.5752823086574655, "grad_norm": 1.8737750053405762, "learning_rate": 4.483244515499598e-06, "loss": 1.2711, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 917 }, { "epoch": 0.5759096612296111, "grad_norm": 1.7228468656539917, "learning_rate": 4.48157720589331e-06, "loss": 1.2358, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 918 }, { "epoch": 0.5765370138017566, "grad_norm": 2.034825325012207, "learning_rate": 4.479907521925168e-06, "loss": 1.2302, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 919 }, { "epoch": 0.5771643663739021, "grad_norm": 1.7857717275619507, "learning_rate": 4.478235465595817e-06, "loss": 1.4052, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 920 }, { "epoch": 0.5777917189460476, "grad_norm": 1.714264154434204, "learning_rate": 4.476561038908745e-06, "loss": 1.2664, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 921 }, { "epoch": 0.5784190715181933, "grad_norm": 1.6064285039901733, "learning_rate": 4.474884243870281e-06, "loss": 1.3298, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 922 }, { "epoch": 0.5790464240903388, "grad_norm": 1.78461492061615, "learning_rate": 4.473205082489592e-06, "loss": 1.3199, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 923 }, { "epoch": 0.5796737766624843, "grad_norm": 1.6482230424880981, "learning_rate": 4.471523556778679e-06, "loss": 1.204, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 924 }, { "epoch": 0.5803011292346298, "grad_norm": 1.351148247718811, "learning_rate": 4.4698396687523765e-06, "loss": 1.1804, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 925 }, { "epoch": 0.5809284818067754, "grad_norm": 1.809248685836792, "learning_rate": 4.46815342042835e-06, "loss": 1.1648, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 926 }, { "epoch": 0.581555834378921, "grad_norm": 1.9116644859313965, "learning_rate": 4.466464813827093e-06, "loss": 1.3359, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 927 }, { "epoch": 0.5821831869510665, "grad_norm": 1.755806565284729, "learning_rate": 4.464773850971924e-06, "loss": 1.2192, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 928 }, { "epoch": 0.582810539523212, "grad_norm": 1.5529253482818604, "learning_rate": 4.463080533888987e-06, "loss": 1.2275, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 929 }, { "epoch": 0.5834378920953576, "grad_norm": 1.8902771472930908, "learning_rate": 4.461384864607243e-06, "loss": 1.2347, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 930 }, { "epoch": 0.5840652446675031, "grad_norm": 1.9781829118728638, "learning_rate": 4.459686845158476e-06, "loss": 1.2602, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 931 }, { "epoch": 0.5846925972396487, "grad_norm": 1.968216896057129, "learning_rate": 4.457986477577283e-06, "loss": 1.1981, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 932 }, { "epoch": 0.5853199498117942, "grad_norm": 1.5119919776916504, "learning_rate": 4.456283763901075e-06, "loss": 1.2146, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 933 }, { "epoch": 0.5859473023839398, "grad_norm": 1.8399906158447266, "learning_rate": 4.454578706170075e-06, "loss": 1.3099, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 934 }, { "epoch": 0.5865746549560853, "grad_norm": 1.8300646543502808, "learning_rate": 4.452871306427314e-06, "loss": 1.1918, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 935 }, { "epoch": 0.5872020075282308, "grad_norm": 1.4192575216293335, "learning_rate": 4.451161566718629e-06, "loss": 1.1847, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 936 }, { "epoch": 0.5878293601003765, "grad_norm": 1.9011478424072266, "learning_rate": 4.4494494890926616e-06, "loss": 1.3157, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 937 }, { "epoch": 0.588456712672522, "grad_norm": 1.544992208480835, "learning_rate": 4.447735075600855e-06, "loss": 1.246, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 938 }, { "epoch": 0.5890840652446675, "grad_norm": 2.110203266143799, "learning_rate": 4.446018328297449e-06, "loss": 1.3751, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 939 }, { "epoch": 0.589711417816813, "grad_norm": 1.8133139610290527, "learning_rate": 4.444299249239483e-06, "loss": 1.2828, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 940 }, { "epoch": 0.5903387703889585, "grad_norm": 1.8554705381393433, "learning_rate": 4.442577840486789e-06, "loss": 1.4578, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 941 }, { "epoch": 0.5909661229611042, "grad_norm": 1.9675077199935913, "learning_rate": 4.440854104101988e-06, "loss": 1.1713, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 942 }, { "epoch": 0.5915934755332497, "grad_norm": 4.033464431762695, "learning_rate": 4.439128042150495e-06, "loss": 1.3743, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 943 }, { "epoch": 0.5922208281053952, "grad_norm": 1.724401593208313, "learning_rate": 4.437399656700507e-06, "loss": 1.2972, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 944 }, { "epoch": 0.5928481806775407, "grad_norm": 1.488004446029663, "learning_rate": 4.435668949823008e-06, "loss": 1.2991, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 945 }, { "epoch": 0.5934755332496863, "grad_norm": 1.8987462520599365, "learning_rate": 4.433935923591763e-06, "loss": 1.3099, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 946 }, { "epoch": 0.5941028858218319, "grad_norm": 1.8244067430496216, "learning_rate": 4.432200580083312e-06, "loss": 1.4052, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 947 }, { "epoch": 0.5947302383939774, "grad_norm": 1.6409897804260254, "learning_rate": 4.43046292137698e-06, "loss": 1.2372, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 948 }, { "epoch": 0.595357590966123, "grad_norm": 1.771867275238037, "learning_rate": 4.428722949554858e-06, "loss": 1.2957, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 949 }, { "epoch": 0.5959849435382685, "grad_norm": 1.882083773612976, "learning_rate": 4.426980666701813e-06, "loss": 1.3752, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 950 }, { "epoch": 0.596612296110414, "grad_norm": 1.818285584449768, "learning_rate": 4.42523607490548e-06, "loss": 1.3053, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 951 }, { "epoch": 0.5972396486825596, "grad_norm": 1.6514558792114258, "learning_rate": 4.42348917625626e-06, "loss": 1.1995, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 952 }, { "epoch": 0.5978670012547052, "grad_norm": 1.948164701461792, "learning_rate": 4.42173997284732e-06, "loss": 1.2561, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 953 }, { "epoch": 0.5984943538268507, "grad_norm": 1.9465723037719727, "learning_rate": 4.4199884667745866e-06, "loss": 1.3594, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 954 }, { "epoch": 0.5991217063989962, "grad_norm": 1.5567758083343506, "learning_rate": 4.418234660136745e-06, "loss": 1.1343, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 955 }, { "epoch": 0.5997490589711418, "grad_norm": 1.8369762897491455, "learning_rate": 4.416478555035241e-06, "loss": 1.2743, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 956 }, { "epoch": 0.6003764115432874, "grad_norm": 1.8079118728637695, "learning_rate": 4.414720153574269e-06, "loss": 1.4227, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 957 }, { "epoch": 0.6010037641154329, "grad_norm": 1.668028712272644, "learning_rate": 4.412959457860779e-06, "loss": 1.4489, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 958 }, { "epoch": 0.6016311166875784, "grad_norm": 1.7507413625717163, "learning_rate": 4.4111964700044684e-06, "loss": 1.2152, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 959 }, { "epoch": 0.6022584692597239, "grad_norm": 1.9011902809143066, "learning_rate": 4.409431192117782e-06, "loss": 1.2223, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 960 }, { "epoch": 0.6028858218318696, "grad_norm": 2.240939140319824, "learning_rate": 4.407663626315907e-06, "loss": 1.2813, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 961 }, { "epoch": 0.6035131744040151, "grad_norm": 1.7186036109924316, "learning_rate": 4.405893774716772e-06, "loss": 1.1469, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 962 }, { "epoch": 0.6041405269761606, "grad_norm": 1.5372388362884521, "learning_rate": 4.404121639441047e-06, "loss": 1.2067, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 963 }, { "epoch": 0.6047678795483061, "grad_norm": 1.7399442195892334, "learning_rate": 4.402347222612137e-06, "loss": 1.1696, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 964 }, { "epoch": 0.6053952321204517, "grad_norm": 1.8909478187561035, "learning_rate": 4.400570526356178e-06, "loss": 1.292, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 965 }, { "epoch": 0.6060225846925973, "grad_norm": 1.7780414819717407, "learning_rate": 4.398791552802043e-06, "loss": 1.5044, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 966 }, { "epoch": 0.6066499372647428, "grad_norm": 1.9680545330047607, "learning_rate": 4.397010304081328e-06, "loss": 1.3634, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 967 }, { "epoch": 0.6072772898368883, "grad_norm": 1.8577983379364014, "learning_rate": 4.39522678232836e-06, "loss": 1.2081, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 968 }, { "epoch": 0.6079046424090339, "grad_norm": 2.26708984375, "learning_rate": 4.393440989680184e-06, "loss": 1.3962, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 969 }, { "epoch": 0.6085319949811794, "grad_norm": 1.7874890565872192, "learning_rate": 4.391652928276572e-06, "loss": 1.2756, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 970 }, { "epoch": 0.609159347553325, "grad_norm": 2.3963043689727783, "learning_rate": 4.38986260026001e-06, "loss": 1.1627, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 971 }, { "epoch": 0.6097867001254705, "grad_norm": 1.408721685409546, "learning_rate": 4.388070007775703e-06, "loss": 1.1634, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 972 }, { "epoch": 0.6104140526976161, "grad_norm": 1.7340478897094727, "learning_rate": 4.3862751529715674e-06, "loss": 1.2605, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 973 }, { "epoch": 0.6110414052697616, "grad_norm": 1.6462897062301636, "learning_rate": 4.38447803799823e-06, "loss": 1.1403, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 974 }, { "epoch": 0.6116687578419071, "grad_norm": 1.8389941453933716, "learning_rate": 4.382678665009028e-06, "loss": 1.2517, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 975 }, { "epoch": 0.6122961104140527, "grad_norm": 1.944242238998413, "learning_rate": 4.380877036160002e-06, "loss": 1.2953, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 976 }, { "epoch": 0.6129234629861983, "grad_norm": 1.7658624649047852, "learning_rate": 4.379073153609896e-06, "loss": 1.2436, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 977 }, { "epoch": 0.6135508155583438, "grad_norm": 2.0299324989318848, "learning_rate": 4.3772670195201565e-06, "loss": 1.4657, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 978 }, { "epoch": 0.6141781681304893, "grad_norm": 1.6620166301727295, "learning_rate": 4.375458636054924e-06, "loss": 1.2166, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 979 }, { "epoch": 0.6148055207026348, "grad_norm": 2.2326536178588867, "learning_rate": 4.373648005381039e-06, "loss": 1.234, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 980 }, { "epoch": 0.6154328732747805, "grad_norm": 1.7606323957443237, "learning_rate": 4.37183512966803e-06, "loss": 1.3452, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 981 }, { "epoch": 0.616060225846926, "grad_norm": 1.1604722738265991, "learning_rate": 4.37002001108812e-06, "loss": 1.3058, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 982 }, { "epoch": 0.6166875784190715, "grad_norm": 1.894524097442627, "learning_rate": 4.368202651816213e-06, "loss": 1.3107, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 983 }, { "epoch": 0.617314930991217, "grad_norm": 1.7152777910232544, "learning_rate": 4.366383054029907e-06, "loss": 1.4284, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 984 }, { "epoch": 0.6179422835633626, "grad_norm": 1.7929563522338867, "learning_rate": 4.364561219909474e-06, "loss": 1.3094, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 985 }, { "epoch": 0.6185696361355082, "grad_norm": 2.654740571975708, "learning_rate": 4.3627371516378695e-06, "loss": 1.2025, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 986 }, { "epoch": 0.6191969887076537, "grad_norm": 1.9211310148239136, "learning_rate": 4.360910851400725e-06, "loss": 1.2593, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 987 }, { "epoch": 0.6198243412797992, "grad_norm": 1.8810856342315674, "learning_rate": 4.359082321386346e-06, "loss": 1.2612, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 988 }, { "epoch": 0.6204516938519448, "grad_norm": 1.736060380935669, "learning_rate": 4.357251563785712e-06, "loss": 1.2275, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 989 }, { "epoch": 0.6210790464240903, "grad_norm": 1.788694977760315, "learning_rate": 4.355418580792471e-06, "loss": 1.2669, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 990 }, { "epoch": 0.6217063989962359, "grad_norm": 1.666709542274475, "learning_rate": 4.3535833746029335e-06, "loss": 1.3095, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 991 }, { "epoch": 0.6223337515683814, "grad_norm": 1.8080217838287354, "learning_rate": 4.351745947416079e-06, "loss": 1.2086, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 992 }, { "epoch": 0.622961104140527, "grad_norm": 1.7072173357009888, "learning_rate": 4.349906301433545e-06, "loss": 1.2606, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 993 }, { "epoch": 0.6235884567126725, "grad_norm": 1.833138346672058, "learning_rate": 4.348064438859629e-06, "loss": 1.3743, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 994 }, { "epoch": 0.624215809284818, "grad_norm": 1.8404881954193115, "learning_rate": 4.346220361901286e-06, "loss": 1.1828, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 995 }, { "epoch": 0.6248431618569636, "grad_norm": 1.721911907196045, "learning_rate": 4.34437407276812e-06, "loss": 1.24, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 996 }, { "epoch": 0.6254705144291092, "grad_norm": 1.9910308122634888, "learning_rate": 4.342525573672391e-06, "loss": 1.2866, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 997 }, { "epoch": 0.6260978670012547, "grad_norm": 1.7354001998901367, "learning_rate": 4.340674866829001e-06, "loss": 1.2425, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 998 }, { "epoch": 0.6267252195734002, "grad_norm": 1.937098503112793, "learning_rate": 4.3388219544555035e-06, "loss": 1.2484, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 999 }, { "epoch": 0.6273525721455459, "grad_norm": 1.6897847652435303, "learning_rate": 4.3369668387720895e-06, "loss": 1.2683, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1000 }, { "epoch": 0.6279799247176914, "grad_norm": 1.848845362663269, "learning_rate": 4.335109522001594e-06, "loss": 1.3694, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1001 }, { "epoch": 0.6286072772898369, "grad_norm": 1.9161001443862915, "learning_rate": 4.333250006369487e-06, "loss": 1.3237, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1002 }, { "epoch": 0.6292346298619824, "grad_norm": 1.8927891254425049, "learning_rate": 4.331388294103874e-06, "loss": 1.3337, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1003 }, { "epoch": 0.6298619824341279, "grad_norm": 1.8695149421691895, "learning_rate": 4.329524387435493e-06, "loss": 1.2412, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1004 }, { "epoch": 0.6304893350062736, "grad_norm": 1.4502034187316895, "learning_rate": 4.32765828859771e-06, "loss": 1.2586, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1005 }, { "epoch": 0.6311166875784191, "grad_norm": 1.9272611141204834, "learning_rate": 4.3257899998265195e-06, "loss": 1.1892, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1006 }, { "epoch": 0.6317440401505646, "grad_norm": 1.830952763557434, "learning_rate": 4.323919523360539e-06, "loss": 1.408, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1007 }, { "epoch": 0.6323713927227101, "grad_norm": 1.6106399297714233, "learning_rate": 4.322046861441006e-06, "loss": 1.4208, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1008 }, { "epoch": 0.6329987452948557, "grad_norm": 1.7375121116638184, "learning_rate": 4.32017201631178e-06, "loss": 1.35, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1009 }, { "epoch": 0.6336260978670013, "grad_norm": 1.7395000457763672, "learning_rate": 4.318294990219334e-06, "loss": 1.3972, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1010 }, { "epoch": 0.6342534504391468, "grad_norm": 1.8794714212417603, "learning_rate": 4.316415785412754e-06, "loss": 1.232, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1011 }, { "epoch": 0.6348808030112923, "grad_norm": 1.9169164896011353, "learning_rate": 4.314534404143738e-06, "loss": 1.3077, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1012 }, { "epoch": 0.6355081555834379, "grad_norm": 1.6044981479644775, "learning_rate": 4.312650848666591e-06, "loss": 1.3272, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1013 }, { "epoch": 0.6361355081555834, "grad_norm": 1.8915094137191772, "learning_rate": 4.310765121238223e-06, "loss": 1.264, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1014 }, { "epoch": 0.636762860727729, "grad_norm": 1.7705018520355225, "learning_rate": 4.308877224118148e-06, "loss": 1.3933, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1015 }, { "epoch": 0.6373902132998746, "grad_norm": 1.880327582359314, "learning_rate": 4.3069871595684795e-06, "loss": 1.2974, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1016 }, { "epoch": 0.6380175658720201, "grad_norm": 1.5655988454818726, "learning_rate": 4.305094929853925e-06, "loss": 1.2287, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1017 }, { "epoch": 0.6386449184441656, "grad_norm": 1.868777871131897, "learning_rate": 4.303200537241789e-06, "loss": 1.3101, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1018 }, { "epoch": 0.6392722710163111, "grad_norm": 1.8370649814605713, "learning_rate": 4.3013039840019675e-06, "loss": 1.2703, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1019 }, { "epoch": 0.6398996235884568, "grad_norm": 1.7365206480026245, "learning_rate": 4.299405272406946e-06, "loss": 1.2183, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1020 }, { "epoch": 0.6405269761606023, "grad_norm": 1.7789405584335327, "learning_rate": 4.297504404731794e-06, "loss": 1.2166, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1021 }, { "epoch": 0.6411543287327478, "grad_norm": 1.79762601852417, "learning_rate": 4.295601383254166e-06, "loss": 1.2795, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1022 }, { "epoch": 0.6417816813048933, "grad_norm": 2.0368082523345947, "learning_rate": 4.293696210254299e-06, "loss": 1.1092, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1023 }, { "epoch": 0.6424090338770388, "grad_norm": 1.8230146169662476, "learning_rate": 4.291788888015002e-06, "loss": 1.3562, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1024 }, { "epoch": 0.6430363864491845, "grad_norm": 1.6714571714401245, "learning_rate": 4.289879418821667e-06, "loss": 1.2922, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1025 }, { "epoch": 0.64366373902133, "grad_norm": 1.827463150024414, "learning_rate": 4.287967804962252e-06, "loss": 1.1723, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1026 }, { "epoch": 0.6442910915934755, "grad_norm": 1.7430213689804077, "learning_rate": 4.286054048727289e-06, "loss": 1.3563, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1027 }, { "epoch": 0.644918444165621, "grad_norm": 1.9313528537750244, "learning_rate": 4.284138152409875e-06, "loss": 1.328, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1028 }, { "epoch": 0.6455457967377666, "grad_norm": 1.788396954536438, "learning_rate": 4.282220118305672e-06, "loss": 1.2578, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1029 }, { "epoch": 0.6461731493099122, "grad_norm": 1.9158381223678589, "learning_rate": 4.2802999487129025e-06, "loss": 1.2759, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1030 }, { "epoch": 0.6468005018820577, "grad_norm": 1.5709127187728882, "learning_rate": 4.278377645932351e-06, "loss": 1.2669, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1031 }, { "epoch": 0.6474278544542033, "grad_norm": 1.917354941368103, "learning_rate": 4.276453212267353e-06, "loss": 1.3398, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1032 }, { "epoch": 0.6480552070263488, "grad_norm": 1.9424017667770386, "learning_rate": 4.274526650023801e-06, "loss": 1.1625, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1033 }, { "epoch": 0.6486825595984943, "grad_norm": 1.6027320623397827, "learning_rate": 4.272597961510137e-06, "loss": 1.3137, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1034 }, { "epoch": 0.6493099121706399, "grad_norm": 1.4474650621414185, "learning_rate": 4.2706671490373505e-06, "loss": 1.2254, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1035 }, { "epoch": 0.6499372647427855, "grad_norm": 1.7298493385314941, "learning_rate": 4.268734214918977e-06, "loss": 1.2897, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1036 }, { "epoch": 0.650564617314931, "grad_norm": 1.4415090084075928, "learning_rate": 4.266799161471093e-06, "loss": 1.1688, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1037 }, { "epoch": 0.6511919698870765, "grad_norm": 2.0625054836273193, "learning_rate": 4.264861991012312e-06, "loss": 1.3359, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1038 }, { "epoch": 0.651819322459222, "grad_norm": 1.8560051918029785, "learning_rate": 4.262922705863791e-06, "loss": 1.3139, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1039 }, { "epoch": 0.6524466750313677, "grad_norm": 1.620300054550171, "learning_rate": 4.260981308349214e-06, "loss": 1.1617, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1040 }, { "epoch": 0.6530740276035132, "grad_norm": 1.65470552444458, "learning_rate": 4.2590378007948e-06, "loss": 1.1033, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1041 }, { "epoch": 0.6537013801756587, "grad_norm": 2.6354520320892334, "learning_rate": 4.257092185529295e-06, "loss": 1.3497, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1042 }, { "epoch": 0.6543287327478042, "grad_norm": 1.8058476448059082, "learning_rate": 4.25514446488397e-06, "loss": 1.304, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1043 }, { "epoch": 0.6549560853199499, "grad_norm": 2.0276644229888916, "learning_rate": 4.253194641192621e-06, "loss": 1.3475, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1044 }, { "epoch": 0.6555834378920954, "grad_norm": 1.907875895500183, "learning_rate": 4.2512427167915595e-06, "loss": 1.4468, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1045 }, { "epoch": 0.6562107904642409, "grad_norm": 1.8480969667434692, "learning_rate": 4.249288694019618e-06, "loss": 1.1998, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1046 }, { "epoch": 0.6568381430363864, "grad_norm": 1.6065888404846191, "learning_rate": 4.247332575218144e-06, "loss": 1.2323, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1047 }, { "epoch": 0.657465495608532, "grad_norm": 1.8290810585021973, "learning_rate": 4.245374362730992e-06, "loss": 1.2553, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1048 }, { "epoch": 0.6580928481806776, "grad_norm": 1.9927012920379639, "learning_rate": 4.2434140589045286e-06, "loss": 1.1855, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1049 }, { "epoch": 0.6587202007528231, "grad_norm": 1.6697808504104614, "learning_rate": 4.241451666087626e-06, "loss": 1.2958, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1050 }, { "epoch": 0.6593475533249686, "grad_norm": 1.8916678428649902, "learning_rate": 4.239487186631659e-06, "loss": 1.1373, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1051 }, { "epoch": 0.6599749058971142, "grad_norm": 1.8192722797393799, "learning_rate": 4.237520622890503e-06, "loss": 1.2844, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1052 }, { "epoch": 0.6606022584692597, "grad_norm": 1.9123562574386597, "learning_rate": 4.235551977220529e-06, "loss": 1.3009, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1053 }, { "epoch": 0.6612296110414053, "grad_norm": 1.4059549570083618, "learning_rate": 4.233581251980604e-06, "loss": 1.2611, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1054 }, { "epoch": 0.6618569636135508, "grad_norm": 1.9050414562225342, "learning_rate": 4.23160844953209e-06, "loss": 1.3481, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1055 }, { "epoch": 0.6624843161856964, "grad_norm": 1.5960050821304321, "learning_rate": 4.229633572238831e-06, "loss": 1.1204, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1056 }, { "epoch": 0.6631116687578419, "grad_norm": 1.4794005155563354, "learning_rate": 4.227656622467162e-06, "loss": 1.1655, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1057 }, { "epoch": 0.6637390213299874, "grad_norm": 1.9069862365722656, "learning_rate": 4.225677602585901e-06, "loss": 1.321, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1058 }, { "epoch": 0.664366373902133, "grad_norm": 1.4333158731460571, "learning_rate": 4.223696514966346e-06, "loss": 1.23, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1059 }, { "epoch": 0.6649937264742786, "grad_norm": 1.650801658630371, "learning_rate": 4.2217133619822705e-06, "loss": 1.2287, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1060 }, { "epoch": 0.6656210790464241, "grad_norm": 2.9155123233795166, "learning_rate": 4.2197281460099245e-06, "loss": 1.209, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1061 }, { "epoch": 0.6662484316185696, "grad_norm": 1.8751521110534668, "learning_rate": 4.217740869428032e-06, "loss": 1.3401, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1062 }, { "epoch": 0.6668757841907151, "grad_norm": 1.9066846370697021, "learning_rate": 4.215751534617783e-06, "loss": 1.28, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1063 }, { "epoch": 0.6675031367628608, "grad_norm": 1.853600263595581, "learning_rate": 4.213760143962834e-06, "loss": 1.36, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1064 }, { "epoch": 0.6681304893350063, "grad_norm": 1.5284377336502075, "learning_rate": 4.211766699849306e-06, "loss": 1.243, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1065 }, { "epoch": 0.6687578419071518, "grad_norm": 1.8650940656661987, "learning_rate": 4.209771204665779e-06, "loss": 1.2, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1066 }, { "epoch": 0.6693851944792973, "grad_norm": 1.7665506601333618, "learning_rate": 4.2077736608032925e-06, "loss": 1.1567, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1067 }, { "epoch": 0.6700125470514429, "grad_norm": 1.9517008066177368, "learning_rate": 4.2057740706553415e-06, "loss": 1.3147, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1068 }, { "epoch": 0.6706398996235885, "grad_norm": 1.7581521272659302, "learning_rate": 4.203772436617868e-06, "loss": 1.427, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1069 }, { "epoch": 0.671267252195734, "grad_norm": 1.9802473783493042, "learning_rate": 4.201768761089269e-06, "loss": 1.3791, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1070 }, { "epoch": 0.6718946047678795, "grad_norm": 1.914982795715332, "learning_rate": 4.199763046470384e-06, "loss": 1.3415, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1071 }, { "epoch": 0.6725219573400251, "grad_norm": 1.864111065864563, "learning_rate": 4.197755295164496e-06, "loss": 1.3969, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1072 }, { "epoch": 0.6731493099121706, "grad_norm": 1.913025975227356, "learning_rate": 4.19574550957733e-06, "loss": 1.2561, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1073 }, { "epoch": 0.6737766624843162, "grad_norm": 1.9390740394592285, "learning_rate": 4.193733692117048e-06, "loss": 1.2471, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1074 }, { "epoch": 0.6744040150564617, "grad_norm": 1.7494572401046753, "learning_rate": 4.191719845194246e-06, "loss": 1.1353, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1075 }, { "epoch": 0.6750313676286073, "grad_norm": 1.8655979633331299, "learning_rate": 4.1897039712219514e-06, "loss": 1.3104, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1076 }, { "epoch": 0.6756587202007528, "grad_norm": 1.9206185340881348, "learning_rate": 4.187686072615621e-06, "loss": 1.3032, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1077 }, { "epoch": 0.6762860727728983, "grad_norm": 1.896394968032837, "learning_rate": 4.185666151793139e-06, "loss": 1.4611, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1078 }, { "epoch": 0.676913425345044, "grad_norm": 1.8265060186386108, "learning_rate": 4.1836442111748086e-06, "loss": 1.2732, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1079 }, { "epoch": 0.6775407779171895, "grad_norm": 1.7321994304656982, "learning_rate": 4.181620253183359e-06, "loss": 1.233, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1080 }, { "epoch": 0.678168130489335, "grad_norm": 1.9845069646835327, "learning_rate": 4.179594280243932e-06, "loss": 1.2736, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1081 }, { "epoch": 0.6787954830614805, "grad_norm": 1.7132633924484253, "learning_rate": 4.177566294784085e-06, "loss": 1.4785, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1082 }, { "epoch": 0.679422835633626, "grad_norm": 1.8451826572418213, "learning_rate": 4.175536299233788e-06, "loss": 1.207, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1083 }, { "epoch": 0.6800501882057717, "grad_norm": 1.9522786140441895, "learning_rate": 4.173504296025417e-06, "loss": 1.1463, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1084 }, { "epoch": 0.6806775407779172, "grad_norm": 1.8040053844451904, "learning_rate": 4.171470287593757e-06, "loss": 1.3937, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1085 }, { "epoch": 0.6813048933500627, "grad_norm": 1.8072997331619263, "learning_rate": 4.169434276375992e-06, "loss": 1.258, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1086 }, { "epoch": 0.6819322459222082, "grad_norm": 1.5943018198013306, "learning_rate": 4.167396264811709e-06, "loss": 1.2709, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1087 }, { "epoch": 0.6825595984943539, "grad_norm": 1.5378663539886475, "learning_rate": 4.165356255342892e-06, "loss": 1.2698, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1088 }, { "epoch": 0.6831869510664994, "grad_norm": 1.8408434391021729, "learning_rate": 4.163314250413913e-06, "loss": 1.3284, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1089 }, { "epoch": 0.6838143036386449, "grad_norm": 2.0114541053771973, "learning_rate": 4.1612702524715445e-06, "loss": 1.2577, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1090 }, { "epoch": 0.6844416562107905, "grad_norm": 1.7099038362503052, "learning_rate": 4.159224263964939e-06, "loss": 1.2858, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1091 }, { "epoch": 0.685069008782936, "grad_norm": 1.7164630889892578, "learning_rate": 4.1571762873456376e-06, "loss": 1.3028, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1092 }, { "epoch": 0.6856963613550816, "grad_norm": 1.8126477003097534, "learning_rate": 4.1551263250675635e-06, "loss": 1.283, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1093 }, { "epoch": 0.6863237139272271, "grad_norm": 1.8203259706497192, "learning_rate": 4.153074379587018e-06, "loss": 1.4703, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1094 }, { "epoch": 0.6869510664993727, "grad_norm": 1.6437162160873413, "learning_rate": 4.151020453362681e-06, "loss": 1.3509, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1095 }, { "epoch": 0.6875784190715182, "grad_norm": 2.030787944793701, "learning_rate": 4.148964548855603e-06, "loss": 1.2699, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1096 }, { "epoch": 0.6882057716436637, "grad_norm": 1.8610453605651855, "learning_rate": 4.146906668529207e-06, "loss": 1.2309, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1097 }, { "epoch": 0.6888331242158093, "grad_norm": 1.8488279581069946, "learning_rate": 4.144846814849282e-06, "loss": 1.289, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1098 }, { "epoch": 0.6894604767879549, "grad_norm": 1.969459891319275, "learning_rate": 4.1427849902839826e-06, "loss": 1.2552, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1099 }, { "epoch": 0.6900878293601004, "grad_norm": 1.5772993564605713, "learning_rate": 4.1407211973038245e-06, "loss": 1.2722, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1100 }, { "epoch": 0.6907151819322459, "grad_norm": 1.722180962562561, "learning_rate": 4.138655438381681e-06, "loss": 1.3136, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1101 }, { "epoch": 0.6913425345043914, "grad_norm": 1.7175124883651733, "learning_rate": 4.1365877159927835e-06, "loss": 1.2704, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1102 }, { "epoch": 0.6919698870765371, "grad_norm": 1.7393913269042969, "learning_rate": 4.134518032614713e-06, "loss": 1.3046, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1103 }, { "epoch": 0.6925972396486826, "grad_norm": 1.930572509765625, "learning_rate": 4.1324463907274035e-06, "loss": 1.4181, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1104 }, { "epoch": 0.6932245922208281, "grad_norm": 1.9321399927139282, "learning_rate": 4.130372792813133e-06, "loss": 1.3217, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1105 }, { "epoch": 0.6938519447929736, "grad_norm": 1.7585630416870117, "learning_rate": 4.128297241356525e-06, "loss": 1.1922, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1106 }, { "epoch": 0.6944792973651192, "grad_norm": 1.8687766790390015, "learning_rate": 4.126219738844542e-06, "loss": 1.3053, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1107 }, { "epoch": 0.6951066499372648, "grad_norm": 1.7932958602905273, "learning_rate": 4.1241402877664845e-06, "loss": 1.3013, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1108 }, { "epoch": 0.6957340025094103, "grad_norm": 1.5376317501068115, "learning_rate": 4.122058890613991e-06, "loss": 1.3376, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1109 }, { "epoch": 0.6963613550815558, "grad_norm": 1.9468011856079102, "learning_rate": 4.119975549881029e-06, "loss": 1.3899, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1110 }, { "epoch": 0.6969887076537014, "grad_norm": 3.2425899505615234, "learning_rate": 4.117890268063894e-06, "loss": 1.2131, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1111 }, { "epoch": 0.6976160602258469, "grad_norm": 1.381169080734253, "learning_rate": 4.1158030476612085e-06, "loss": 1.2724, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1112 }, { "epoch": 0.6982434127979925, "grad_norm": 1.7302320003509521, "learning_rate": 4.1137138911739185e-06, "loss": 1.2668, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1113 }, { "epoch": 0.698870765370138, "grad_norm": 1.9618453979492188, "learning_rate": 4.11162280110529e-06, "loss": 1.4453, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1114 }, { "epoch": 0.6994981179422836, "grad_norm": 1.8300155401229858, "learning_rate": 4.109529779960905e-06, "loss": 1.3275, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1115 }, { "epoch": 0.7001254705144291, "grad_norm": 1.7978266477584839, "learning_rate": 4.107434830248658e-06, "loss": 1.2542, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1116 }, { "epoch": 0.7007528230865746, "grad_norm": 1.8951441049575806, "learning_rate": 4.105337954478756e-06, "loss": 1.1943, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1117 }, { "epoch": 0.7013801756587202, "grad_norm": 1.8222427368164062, "learning_rate": 4.103239155163718e-06, "loss": 1.191, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1118 }, { "epoch": 0.7020075282308658, "grad_norm": 1.821305513381958, "learning_rate": 4.1011384348183565e-06, "loss": 1.1811, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1119 }, { "epoch": 0.7026348808030113, "grad_norm": 1.656417965888977, "learning_rate": 4.099035795959798e-06, "loss": 1.2397, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1120 }, { "epoch": 0.7032622333751568, "grad_norm": 1.4519686698913574, "learning_rate": 4.096931241107461e-06, "loss": 1.2223, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1121 }, { "epoch": 0.7038895859473023, "grad_norm": 1.6120268106460571, "learning_rate": 4.094824772783062e-06, "loss": 1.2453, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1122 }, { "epoch": 0.704516938519448, "grad_norm": 1.7454333305358887, "learning_rate": 4.092716393510609e-06, "loss": 1.3674, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1123 }, { "epoch": 0.7051442910915935, "grad_norm": 1.7884482145309448, "learning_rate": 4.0906061058164e-06, "loss": 1.2876, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1124 }, { "epoch": 0.705771643663739, "grad_norm": 1.7549852132797241, "learning_rate": 4.0884939122290215e-06, "loss": 1.2451, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1125 }, { "epoch": 0.7063989962358845, "grad_norm": 1.5388882160186768, "learning_rate": 4.08637981527934e-06, "loss": 1.2364, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1126 }, { "epoch": 0.7070263488080301, "grad_norm": 1.7616428136825562, "learning_rate": 4.084263817500508e-06, "loss": 1.2736, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1127 }, { "epoch": 0.7076537013801757, "grad_norm": 1.576174259185791, "learning_rate": 4.082145921427949e-06, "loss": 1.2303, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1128 }, { "epoch": 0.7082810539523212, "grad_norm": 1.961186170578003, "learning_rate": 4.080026129599368e-06, "loss": 1.2455, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1129 }, { "epoch": 0.7089084065244667, "grad_norm": 1.8993115425109863, "learning_rate": 4.077904444554736e-06, "loss": 1.1639, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1130 }, { "epoch": 0.7095357590966123, "grad_norm": 1.7968101501464844, "learning_rate": 4.075780868836296e-06, "loss": 1.3066, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1131 }, { "epoch": 0.7101631116687579, "grad_norm": 2.5451998710632324, "learning_rate": 4.073655404988554e-06, "loss": 1.2309, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1132 }, { "epoch": 0.7107904642409034, "grad_norm": 1.7790062427520752, "learning_rate": 4.071528055558278e-06, "loss": 1.2954, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1133 }, { "epoch": 0.7114178168130489, "grad_norm": 1.8270624876022339, "learning_rate": 4.0693988230945e-06, "loss": 1.1384, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1134 }, { "epoch": 0.7120451693851945, "grad_norm": 1.8887920379638672, "learning_rate": 4.067267710148504e-06, "loss": 1.2272, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1135 }, { "epoch": 0.71267252195734, "grad_norm": 1.9454110860824585, "learning_rate": 4.065134719273828e-06, "loss": 1.3557, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1136 }, { "epoch": 0.7132998745294856, "grad_norm": 1.7404446601867676, "learning_rate": 4.062999853026259e-06, "loss": 1.2289, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1137 }, { "epoch": 0.7139272271016311, "grad_norm": 1.4533557891845703, "learning_rate": 4.060863113963835e-06, "loss": 1.2886, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1138 }, { "epoch": 0.7145545796737767, "grad_norm": 1.7258976697921753, "learning_rate": 4.058724504646834e-06, "loss": 1.2859, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1139 }, { "epoch": 0.7151819322459222, "grad_norm": 1.8206146955490112, "learning_rate": 4.056584027637778e-06, "loss": 1.3065, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1140 }, { "epoch": 0.7158092848180677, "grad_norm": 1.9629409313201904, "learning_rate": 4.054441685501423e-06, "loss": 1.3129, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1141 }, { "epoch": 0.7164366373902133, "grad_norm": 1.4002165794372559, "learning_rate": 4.0522974808047655e-06, "loss": 1.247, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1142 }, { "epoch": 0.7170639899623589, "grad_norm": 2.054499387741089, "learning_rate": 4.050151416117028e-06, "loss": 1.3253, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1143 }, { "epoch": 0.7176913425345044, "grad_norm": 1.6483572721481323, "learning_rate": 4.048003494009666e-06, "loss": 1.1892, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1144 }, { "epoch": 0.7183186951066499, "grad_norm": 1.6913396120071411, "learning_rate": 4.045853717056358e-06, "loss": 1.2009, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1145 }, { "epoch": 0.7189460476787954, "grad_norm": 1.7624170780181885, "learning_rate": 4.043702087833006e-06, "loss": 1.2018, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1146 }, { "epoch": 0.7195734002509411, "grad_norm": 2.329902410507202, "learning_rate": 4.04154860891773e-06, "loss": 1.161, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1147 }, { "epoch": 0.7202007528230866, "grad_norm": 1.9451358318328857, "learning_rate": 4.03939328289087e-06, "loss": 1.2845, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1148 }, { "epoch": 0.7208281053952321, "grad_norm": 2.0189058780670166, "learning_rate": 4.037236112334976e-06, "loss": 1.2569, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1149 }, { "epoch": 0.7214554579673776, "grad_norm": 1.3586088418960571, "learning_rate": 4.0350770998348075e-06, "loss": 1.1772, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1150 }, { "epoch": 0.7220828105395232, "grad_norm": 1.76394784450531, "learning_rate": 4.032916247977334e-06, "loss": 1.2178, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1151 }, { "epoch": 0.7227101631116688, "grad_norm": 1.484285593032837, "learning_rate": 4.030753559351728e-06, "loss": 1.1872, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1152 }, { "epoch": 0.7233375156838143, "grad_norm": 1.5376477241516113, "learning_rate": 4.028589036549361e-06, "loss": 1.2631, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1153 }, { "epoch": 0.7239648682559598, "grad_norm": 2.0866265296936035, "learning_rate": 4.026422682163804e-06, "loss": 1.2962, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1154 }, { "epoch": 0.7245922208281054, "grad_norm": 1.7734241485595703, "learning_rate": 4.0242544987908235e-06, "loss": 1.3571, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1155 }, { "epoch": 0.7252195734002509, "grad_norm": 1.6300272941589355, "learning_rate": 4.022084489028375e-06, "loss": 1.2431, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1156 }, { "epoch": 0.7258469259723965, "grad_norm": 1.834957480430603, "learning_rate": 4.019912655476603e-06, "loss": 1.3553, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1157 }, { "epoch": 0.726474278544542, "grad_norm": 1.8868942260742188, "learning_rate": 4.017739000737839e-06, "loss": 1.2373, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1158 }, { "epoch": 0.7271016311166876, "grad_norm": 1.9034756422042847, "learning_rate": 4.015563527416596e-06, "loss": 1.3091, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1159 }, { "epoch": 0.7277289836888331, "grad_norm": 1.6866358518600464, "learning_rate": 4.0133862381195645e-06, "loss": 1.1822, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1160 }, { "epoch": 0.7283563362609786, "grad_norm": 1.3602663278579712, "learning_rate": 4.011207135455612e-06, "loss": 1.1083, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1161 }, { "epoch": 0.7289836888331243, "grad_norm": 1.928031325340271, "learning_rate": 4.009026222035782e-06, "loss": 1.2096, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1162 }, { "epoch": 0.7296110414052698, "grad_norm": 1.8566958904266357, "learning_rate": 4.006843500473281e-06, "loss": 1.3159, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1163 }, { "epoch": 0.7302383939774153, "grad_norm": 1.9040616750717163, "learning_rate": 4.0046589733834875e-06, "loss": 1.3272, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1164 }, { "epoch": 0.7308657465495608, "grad_norm": 1.490311861038208, "learning_rate": 4.002472643383941e-06, "loss": 1.3552, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1165 }, { "epoch": 0.7314930991217063, "grad_norm": 1.6385480165481567, "learning_rate": 4.000284513094342e-06, "loss": 1.2069, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1166 }, { "epoch": 0.732120451693852, "grad_norm": 2.097144603729248, "learning_rate": 3.9980945851365485e-06, "loss": 1.2088, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1167 }, { "epoch": 0.7327478042659975, "grad_norm": 1.6262550354003906, "learning_rate": 3.99590286213457e-06, "loss": 1.4293, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1168 }, { "epoch": 0.733375156838143, "grad_norm": 1.6071372032165527, "learning_rate": 3.9937093467145725e-06, "loss": 1.0753, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1169 }, { "epoch": 0.7340025094102886, "grad_norm": 1.8095484972000122, "learning_rate": 3.991514041504863e-06, "loss": 1.3106, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1170 }, { "epoch": 0.7346298619824341, "grad_norm": 1.8286887407302856, "learning_rate": 3.989316949135898e-06, "loss": 1.2928, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1171 }, { "epoch": 0.7352572145545797, "grad_norm": 1.8071660995483398, "learning_rate": 3.987118072240272e-06, "loss": 1.2948, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1172 }, { "epoch": 0.7358845671267252, "grad_norm": 1.8113529682159424, "learning_rate": 3.984917413452721e-06, "loss": 1.3549, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1173 }, { "epoch": 0.7365119196988708, "grad_norm": 1.5373753309249878, "learning_rate": 3.982714975410111e-06, "loss": 1.1386, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1174 }, { "epoch": 0.7371392722710163, "grad_norm": 1.5793862342834473, "learning_rate": 3.980510760751447e-06, "loss": 1.2817, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1175 }, { "epoch": 0.7377666248431619, "grad_norm": 1.647489070892334, "learning_rate": 3.978304772117859e-06, "loss": 1.2604, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1176 }, { "epoch": 0.7383939774153074, "grad_norm": 1.910098910331726, "learning_rate": 3.9760970121526e-06, "loss": 1.2902, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1177 }, { "epoch": 0.739021329987453, "grad_norm": 2.1126773357391357, "learning_rate": 3.973887483501051e-06, "loss": 1.2406, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1178 }, { "epoch": 0.7396486825595985, "grad_norm": 1.8298122882843018, "learning_rate": 3.971676188810707e-06, "loss": 1.2546, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1179 }, { "epoch": 0.740276035131744, "grad_norm": 1.6926147937774658, "learning_rate": 3.969463130731183e-06, "loss": 1.2602, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1180 }, { "epoch": 0.7409033877038896, "grad_norm": 2.075894832611084, "learning_rate": 3.9672483119142055e-06, "loss": 1.2015, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1181 }, { "epoch": 0.7415307402760352, "grad_norm": 1.5787642002105713, "learning_rate": 3.96503173501361e-06, "loss": 1.2999, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1182 }, { "epoch": 0.7421580928481807, "grad_norm": 1.943341612815857, "learning_rate": 3.962813402685339e-06, "loss": 1.2898, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1183 }, { "epoch": 0.7427854454203262, "grad_norm": 1.6585906744003296, "learning_rate": 3.96059331758744e-06, "loss": 1.3, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1184 }, { "epoch": 0.7434127979924717, "grad_norm": 1.8577035665512085, "learning_rate": 3.9583714823800575e-06, "loss": 1.3885, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1185 }, { "epoch": 0.7440401505646174, "grad_norm": 1.5204012393951416, "learning_rate": 3.9561478997254375e-06, "loss": 1.1612, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1186 }, { "epoch": 0.7446675031367629, "grad_norm": 1.1847776174545288, "learning_rate": 3.953922572287915e-06, "loss": 1.1716, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1187 }, { "epoch": 0.7452948557089084, "grad_norm": 2.699477195739746, "learning_rate": 3.951695502733917e-06, "loss": 1.285, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1188 }, { "epoch": 0.7459222082810539, "grad_norm": 2.0467886924743652, "learning_rate": 3.949466693731962e-06, "loss": 1.3222, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1189 }, { "epoch": 0.7465495608531995, "grad_norm": 1.7042464017868042, "learning_rate": 3.947236147952647e-06, "loss": 1.1682, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1190 }, { "epoch": 0.7471769134253451, "grad_norm": 1.9766619205474854, "learning_rate": 3.945003868068653e-06, "loss": 1.3811, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1191 }, { "epoch": 0.7478042659974906, "grad_norm": 1.95023512840271, "learning_rate": 3.942769856754739e-06, "loss": 1.0653, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1192 }, { "epoch": 0.7484316185696361, "grad_norm": 2.0074973106384277, "learning_rate": 3.940534116687737e-06, "loss": 1.3036, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1193 }, { "epoch": 0.7490589711417817, "grad_norm": 1.8049767017364502, "learning_rate": 3.938296650546552e-06, "loss": 1.4716, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1194 }, { "epoch": 0.7496863237139272, "grad_norm": 1.9003103971481323, "learning_rate": 3.936057461012157e-06, "loss": 1.3468, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1195 }, { "epoch": 0.7503136762860728, "grad_norm": 1.6638295650482178, "learning_rate": 3.9338165507675885e-06, "loss": 1.2144, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1196 }, { "epoch": 0.7509410288582183, "grad_norm": 1.6862947940826416, "learning_rate": 3.931573922497947e-06, "loss": 1.2948, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1197 }, { "epoch": 0.7515683814303639, "grad_norm": 1.7455106973648071, "learning_rate": 3.92932957889039e-06, "loss": 1.388, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1198 }, { "epoch": 0.7521957340025094, "grad_norm": 1.7992185354232788, "learning_rate": 3.927083522634132e-06, "loss": 1.1686, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1199 }, { "epoch": 0.7528230865746549, "grad_norm": 2.1805531978607178, "learning_rate": 3.9248357564204364e-06, "loss": 1.3465, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1200 }, { "epoch": 0.7534504391468005, "grad_norm": 1.8067539930343628, "learning_rate": 3.9225862829426184e-06, "loss": 1.3312, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1201 }, { "epoch": 0.7540777917189461, "grad_norm": 1.7938944101333618, "learning_rate": 3.920335104896039e-06, "loss": 1.2371, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1202 }, { "epoch": 0.7547051442910916, "grad_norm": 1.7084840536117554, "learning_rate": 3.918082224978099e-06, "loss": 1.3185, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1203 }, { "epoch": 0.7553324968632371, "grad_norm": 1.7908164262771606, "learning_rate": 3.915827645888242e-06, "loss": 1.2634, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1204 }, { "epoch": 0.7559598494353826, "grad_norm": 1.709354281425476, "learning_rate": 3.913571370327944e-06, "loss": 1.1975, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1205 }, { "epoch": 0.7565872020075283, "grad_norm": 1.792067050933838, "learning_rate": 3.911313401000716e-06, "loss": 1.3193, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1206 }, { "epoch": 0.7572145545796738, "grad_norm": 1.76042640209198, "learning_rate": 3.909053740612099e-06, "loss": 1.226, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1207 }, { "epoch": 0.7578419071518193, "grad_norm": 1.750500202178955, "learning_rate": 3.906792391869657e-06, "loss": 1.4268, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1208 }, { "epoch": 0.7584692597239648, "grad_norm": 1.4998160600662231, "learning_rate": 3.904529357482981e-06, "loss": 1.1727, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1209 }, { "epoch": 0.7590966122961104, "grad_norm": 2.04630970954895, "learning_rate": 3.90226464016368e-06, "loss": 1.373, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1210 }, { "epoch": 0.759723964868256, "grad_norm": 2.0300686359405518, "learning_rate": 3.899998242625378e-06, "loss": 1.1817, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1211 }, { "epoch": 0.7603513174404015, "grad_norm": 1.8236775398254395, "learning_rate": 3.897730167583716e-06, "loss": 1.2345, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1212 }, { "epoch": 0.760978670012547, "grad_norm": 2.1852262020111084, "learning_rate": 3.8954604177563395e-06, "loss": 1.3007, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1213 }, { "epoch": 0.7616060225846926, "grad_norm": 1.4265241622924805, "learning_rate": 3.893188995862907e-06, "loss": 1.0932, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1214 }, { "epoch": 0.7622333751568381, "grad_norm": 1.7335155010223389, "learning_rate": 3.890915904625075e-06, "loss": 1.3525, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1215 }, { "epoch": 0.7628607277289837, "grad_norm": 1.8855781555175781, "learning_rate": 3.888641146766506e-06, "loss": 1.2274, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1216 }, { "epoch": 0.7634880803011292, "grad_norm": 1.8744924068450928, "learning_rate": 3.886364725012854e-06, "loss": 1.2034, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1217 }, { "epoch": 0.7641154328732748, "grad_norm": 1.7175720930099487, "learning_rate": 3.884086642091769e-06, "loss": 1.2869, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1218 }, { "epoch": 0.7647427854454203, "grad_norm": 1.662237286567688, "learning_rate": 3.881806900732893e-06, "loss": 1.1438, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1219 }, { "epoch": 0.7653701380175659, "grad_norm": 1.7514503002166748, "learning_rate": 3.879525503667851e-06, "loss": 1.1766, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1220 }, { "epoch": 0.7659974905897114, "grad_norm": 1.7529418468475342, "learning_rate": 3.8772424536302565e-06, "loss": 1.2273, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1221 }, { "epoch": 0.766624843161857, "grad_norm": 1.989951252937317, "learning_rate": 3.874957753355701e-06, "loss": 1.1705, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1222 }, { "epoch": 0.7672521957340025, "grad_norm": 1.5698517560958862, "learning_rate": 3.872671405581754e-06, "loss": 1.2493, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1223 }, { "epoch": 0.767879548306148, "grad_norm": 1.5441415309906006, "learning_rate": 3.870383413047959e-06, "loss": 1.1989, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1224 }, { "epoch": 0.7685069008782937, "grad_norm": 1.7378580570220947, "learning_rate": 3.86809377849583e-06, "loss": 1.1907, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1225 }, { "epoch": 0.7691342534504392, "grad_norm": 1.8756630420684814, "learning_rate": 3.865802504668849e-06, "loss": 1.2261, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1226 }, { "epoch": 0.7697616060225847, "grad_norm": 1.6204801797866821, "learning_rate": 3.863509594312461e-06, "loss": 1.163, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1227 }, { "epoch": 0.7703889585947302, "grad_norm": 1.9416941404342651, "learning_rate": 3.861215050174074e-06, "loss": 1.2847, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1228 }, { "epoch": 0.7710163111668757, "grad_norm": 1.780923843383789, "learning_rate": 3.858918875003053e-06, "loss": 1.4637, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1229 }, { "epoch": 0.7716436637390214, "grad_norm": 1.8576253652572632, "learning_rate": 3.856621071550716e-06, "loss": 1.1524, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1230 }, { "epoch": 0.7722710163111669, "grad_norm": 2.069793939590454, "learning_rate": 3.854321642570334e-06, "loss": 1.4231, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1231 }, { "epoch": 0.7728983688833124, "grad_norm": 1.6411324739456177, "learning_rate": 3.852020590817123e-06, "loss": 1.2096, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1232 }, { "epoch": 0.773525721455458, "grad_norm": 1.8884642124176025, "learning_rate": 3.8497179190482445e-06, "loss": 1.3151, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1233 }, { "epoch": 0.7741530740276035, "grad_norm": 2.067143678665161, "learning_rate": 3.847413630022804e-06, "loss": 1.2766, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1234 }, { "epoch": 0.7747804265997491, "grad_norm": 1.7415361404418945, "learning_rate": 3.8451077265018426e-06, "loss": 1.285, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1235 }, { "epoch": 0.7754077791718946, "grad_norm": 1.9397696256637573, "learning_rate": 3.842800211248333e-06, "loss": 1.299, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1236 }, { "epoch": 0.7760351317440402, "grad_norm": 2.149726390838623, "learning_rate": 3.840491087027184e-06, "loss": 1.2434, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1237 }, { "epoch": 0.7766624843161857, "grad_norm": 1.3199256658554077, "learning_rate": 3.83818035660523e-06, "loss": 1.2262, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1238 }, { "epoch": 0.7772898368883312, "grad_norm": 1.7844839096069336, "learning_rate": 3.835868022751231e-06, "loss": 1.146, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1239 }, { "epoch": 0.7779171894604768, "grad_norm": 3.71295166015625, "learning_rate": 3.833554088235866e-06, "loss": 1.3146, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1240 }, { "epoch": 0.7785445420326224, "grad_norm": 1.7507476806640625, "learning_rate": 3.831238555831735e-06, "loss": 1.1682, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1241 }, { "epoch": 0.7791718946047679, "grad_norm": 1.502406120300293, "learning_rate": 3.828921428313349e-06, "loss": 1.2024, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1242 }, { "epoch": 0.7797992471769134, "grad_norm": 1.2211238145828247, "learning_rate": 3.8266027084571335e-06, "loss": 1.253, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1243 }, { "epoch": 0.7804265997490589, "grad_norm": 1.661982774734497, "learning_rate": 3.824282399041421e-06, "loss": 1.3408, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1244 }, { "epoch": 0.7810539523212046, "grad_norm": 1.6972383260726929, "learning_rate": 3.821960502846449e-06, "loss": 1.3019, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1245 }, { "epoch": 0.7816813048933501, "grad_norm": 1.9191818237304688, "learning_rate": 3.819637022654355e-06, "loss": 1.2167, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1246 }, { "epoch": 0.7823086574654956, "grad_norm": 1.999821424484253, "learning_rate": 3.817311961249175e-06, "loss": 1.3381, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1247 }, { "epoch": 0.7829360100376411, "grad_norm": 1.9394023418426514, "learning_rate": 3.814985321416839e-06, "loss": 1.271, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1248 }, { "epoch": 0.7835633626097867, "grad_norm": 1.752686619758606, "learning_rate": 3.812657105945171e-06, "loss": 1.1284, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1249 }, { "epoch": 0.7841907151819323, "grad_norm": 1.853484034538269, "learning_rate": 3.810327317623881e-06, "loss": 1.1259, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1250 }, { "epoch": 0.7848180677540778, "grad_norm": 1.6975935697555542, "learning_rate": 3.8079959592445626e-06, "loss": 1.2346, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1251 }, { "epoch": 0.7854454203262233, "grad_norm": 1.5964429378509521, "learning_rate": 3.8056630336006915e-06, "loss": 1.2542, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1252 }, { "epoch": 0.7860727728983689, "grad_norm": 1.616788387298584, "learning_rate": 3.8033285434876245e-06, "loss": 1.3044, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1253 }, { "epoch": 0.7867001254705144, "grad_norm": 1.6051348447799683, "learning_rate": 3.8009924917025864e-06, "loss": 1.1955, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1254 }, { "epoch": 0.78732747804266, "grad_norm": 1.761275053024292, "learning_rate": 3.7986548810446796e-06, "loss": 1.2944, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1255 }, { "epoch": 0.7879548306148055, "grad_norm": 1.8812828063964844, "learning_rate": 3.7963157143148705e-06, "loss": 1.1681, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1256 }, { "epoch": 0.7885821831869511, "grad_norm": 1.7038171291351318, "learning_rate": 3.793974994315991e-06, "loss": 1.3871, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1257 }, { "epoch": 0.7892095357590966, "grad_norm": 2.0471980571746826, "learning_rate": 3.791632723852736e-06, "loss": 1.288, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1258 }, { "epoch": 0.7898368883312421, "grad_norm": 1.5858546495437622, "learning_rate": 3.789288905731655e-06, "loss": 1.2338, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1259 }, { "epoch": 0.7904642409033877, "grad_norm": 1.7153313159942627, "learning_rate": 3.7869435427611547e-06, "loss": 1.299, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1260 }, { "epoch": 0.7910915934755333, "grad_norm": 1.87894606590271, "learning_rate": 3.78459663775149e-06, "loss": 1.2089, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1261 }, { "epoch": 0.7917189460476788, "grad_norm": 2.0329928398132324, "learning_rate": 3.782248193514766e-06, "loss": 1.3064, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1262 }, { "epoch": 0.7923462986198243, "grad_norm": 2.049156427383423, "learning_rate": 3.7798982128649315e-06, "loss": 1.1991, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1263 }, { "epoch": 0.7929736511919699, "grad_norm": 1.8942718505859375, "learning_rate": 3.7775466986177763e-06, "loss": 1.2944, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1264 }, { "epoch": 0.7936010037641155, "grad_norm": 1.7939801216125488, "learning_rate": 3.775193653590927e-06, "loss": 1.3067, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1265 }, { "epoch": 0.794228356336261, "grad_norm": 1.5614609718322754, "learning_rate": 3.7728390806038456e-06, "loss": 1.1798, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1266 }, { "epoch": 0.7948557089084065, "grad_norm": 1.604327917098999, "learning_rate": 3.7704829824778245e-06, "loss": 1.166, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1267 }, { "epoch": 0.795483061480552, "grad_norm": 1.6718720197677612, "learning_rate": 3.7681253620359814e-06, "loss": 1.1947, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1268 }, { "epoch": 0.7961104140526977, "grad_norm": 1.7396267652511597, "learning_rate": 3.765766222103262e-06, "loss": 1.2189, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1269 }, { "epoch": 0.7967377666248432, "grad_norm": 1.5587661266326904, "learning_rate": 3.7634055655064296e-06, "loss": 1.3867, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1270 }, { "epoch": 0.7973651191969887, "grad_norm": 2.117201328277588, "learning_rate": 3.7610433950740667e-06, "loss": 1.2159, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1271 }, { "epoch": 0.7979924717691342, "grad_norm": 1.485001564025879, "learning_rate": 3.7586797136365682e-06, "loss": 1.278, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1272 }, { "epoch": 0.7986198243412798, "grad_norm": 1.8766347169876099, "learning_rate": 3.756314524026141e-06, "loss": 1.3496, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1273 }, { "epoch": 0.7992471769134254, "grad_norm": 1.7875092029571533, "learning_rate": 3.753947829076797e-06, "loss": 1.2092, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1274 }, { "epoch": 0.7998745294855709, "grad_norm": 1.8150584697723389, "learning_rate": 3.751579631624355e-06, "loss": 1.217, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1275 }, { "epoch": 0.8005018820577164, "grad_norm": 1.8528504371643066, "learning_rate": 3.74920993450643e-06, "loss": 1.3494, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1276 }, { "epoch": 0.801129234629862, "grad_norm": 1.870395541191101, "learning_rate": 3.7468387405624373e-06, "loss": 1.1856, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1277 }, { "epoch": 0.8017565872020075, "grad_norm": 1.8406163454055786, "learning_rate": 3.7444660526335853e-06, "loss": 1.1518, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1278 }, { "epoch": 0.8023839397741531, "grad_norm": 1.3434467315673828, "learning_rate": 3.7420918735628714e-06, "loss": 1.1693, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1279 }, { "epoch": 0.8030112923462986, "grad_norm": 1.5686734914779663, "learning_rate": 3.73971620619508e-06, "loss": 1.2969, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1280 }, { "epoch": 0.8036386449184442, "grad_norm": 1.5028196573257446, "learning_rate": 3.7373390533767783e-06, "loss": 1.2124, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1281 }, { "epoch": 0.8042659974905897, "grad_norm": 1.6024171113967896, "learning_rate": 3.7349604179563156e-06, "loss": 1.2056, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1282 }, { "epoch": 0.8048933500627352, "grad_norm": 1.773492455482483, "learning_rate": 3.7325803027838146e-06, "loss": 1.243, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1283 }, { "epoch": 0.8055207026348808, "grad_norm": 1.4282128810882568, "learning_rate": 3.730198710711173e-06, "loss": 1.2822, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1284 }, { "epoch": 0.8061480552070264, "grad_norm": 1.796323537826538, "learning_rate": 3.7278156445920584e-06, "loss": 1.3409, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1285 }, { "epoch": 0.8067754077791719, "grad_norm": 2.14421010017395, "learning_rate": 3.725431107281904e-06, "loss": 1.267, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1286 }, { "epoch": 0.8074027603513174, "grad_norm": 1.5279842615127563, "learning_rate": 3.7230451016379045e-06, "loss": 1.1884, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1287 }, { "epoch": 0.8080301129234629, "grad_norm": 1.8912734985351562, "learning_rate": 3.720657630519016e-06, "loss": 1.2774, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1288 }, { "epoch": 0.8086574654956086, "grad_norm": 1.9765260219573975, "learning_rate": 3.71826869678595e-06, "loss": 1.3237, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1289 }, { "epoch": 0.8092848180677541, "grad_norm": 1.8362704515457153, "learning_rate": 3.7158783033011702e-06, "loss": 1.1868, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1290 }, { "epoch": 0.8099121706398996, "grad_norm": 1.8150081634521484, "learning_rate": 3.7134864529288907e-06, "loss": 1.3989, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1291 }, { "epoch": 0.8105395232120451, "grad_norm": 1.8795225620269775, "learning_rate": 3.711093148535068e-06, "loss": 1.3224, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1292 }, { "epoch": 0.8111668757841907, "grad_norm": 2.3372652530670166, "learning_rate": 3.7086983929874044e-06, "loss": 1.3099, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1293 }, { "epoch": 0.8117942283563363, "grad_norm": 1.8464175462722778, "learning_rate": 3.7063021891553384e-06, "loss": 1.1561, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1294 }, { "epoch": 0.8124215809284818, "grad_norm": 1.796033263206482, "learning_rate": 3.703904539910047e-06, "loss": 1.4166, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1295 }, { "epoch": 0.8130489335006273, "grad_norm": 2.128056049346924, "learning_rate": 3.701505448124435e-06, "loss": 1.3366, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1296 }, { "epoch": 0.8136762860727729, "grad_norm": 2.16898512840271, "learning_rate": 3.6991049166731387e-06, "loss": 1.3019, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1297 }, { "epoch": 0.8143036386449184, "grad_norm": 1.7204794883728027, "learning_rate": 3.696702948432519e-06, "loss": 1.1802, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1298 }, { "epoch": 0.814930991217064, "grad_norm": 1.7379305362701416, "learning_rate": 3.6942995462806574e-06, "loss": 1.1881, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1299 }, { "epoch": 0.8155583437892095, "grad_norm": 2.0231478214263916, "learning_rate": 3.691894713097355e-06, "loss": 1.3784, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1300 }, { "epoch": 0.8161856963613551, "grad_norm": 1.7545968294143677, "learning_rate": 3.689488451764125e-06, "loss": 1.3519, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1301 }, { "epoch": 0.8168130489335006, "grad_norm": 1.6912481784820557, "learning_rate": 3.687080765164194e-06, "loss": 1.3577, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1302 }, { "epoch": 0.8174404015056461, "grad_norm": 1.7708526849746704, "learning_rate": 3.684671656182497e-06, "loss": 1.2427, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1303 }, { "epoch": 0.8180677540777918, "grad_norm": 1.7793676853179932, "learning_rate": 3.682261127705671e-06, "loss": 1.3287, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1304 }, { "epoch": 0.8186951066499373, "grad_norm": 1.8184021711349487, "learning_rate": 3.679849182622056e-06, "loss": 1.3435, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1305 }, { "epoch": 0.8193224592220828, "grad_norm": 1.8825409412384033, "learning_rate": 3.6774358238216878e-06, "loss": 1.3772, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1306 }, { "epoch": 0.8199498117942283, "grad_norm": 1.8324886560440063, "learning_rate": 3.6750210541962972e-06, "loss": 1.3492, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1307 }, { "epoch": 0.820577164366374, "grad_norm": 1.7730599641799927, "learning_rate": 3.6726048766393046e-06, "loss": 1.3669, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1308 }, { "epoch": 0.8212045169385195, "grad_norm": 1.756497859954834, "learning_rate": 3.670187294045819e-06, "loss": 1.4286, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1309 }, { "epoch": 0.821831869510665, "grad_norm": 1.1420440673828125, "learning_rate": 3.6677683093126297e-06, "loss": 1.1475, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1310 }, { "epoch": 0.8224592220828105, "grad_norm": 1.56114661693573, "learning_rate": 3.665347925338211e-06, "loss": 1.333, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1311 }, { "epoch": 0.823086574654956, "grad_norm": 1.8046437501907349, "learning_rate": 3.662926145022708e-06, "loss": 1.2138, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1312 }, { "epoch": 0.8237139272271017, "grad_norm": 1.684722900390625, "learning_rate": 3.660502971267945e-06, "loss": 1.254, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1313 }, { "epoch": 0.8243412797992472, "grad_norm": 1.9925870895385742, "learning_rate": 3.6580784069774104e-06, "loss": 1.2882, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1314 }, { "epoch": 0.8249686323713927, "grad_norm": 1.7439500093460083, "learning_rate": 3.655652455056263e-06, "loss": 1.1997, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1315 }, { "epoch": 0.8255959849435383, "grad_norm": 1.1201549768447876, "learning_rate": 3.653225118411321e-06, "loss": 1.2117, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1316 }, { "epoch": 0.8262233375156838, "grad_norm": 1.7333301305770874, "learning_rate": 3.6507963999510647e-06, "loss": 1.2691, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1317 }, { "epoch": 0.8268506900878294, "grad_norm": 1.7772564888000488, "learning_rate": 3.6483663025856276e-06, "loss": 1.3077, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1318 }, { "epoch": 0.8274780426599749, "grad_norm": 1.8323562145233154, "learning_rate": 3.645934829226797e-06, "loss": 1.3552, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1319 }, { "epoch": 0.8281053952321205, "grad_norm": 1.7378582954406738, "learning_rate": 3.6435019827880093e-06, "loss": 1.2976, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1320 }, { "epoch": 0.828732747804266, "grad_norm": 1.983419418334961, "learning_rate": 3.641067766184344e-06, "loss": 1.3097, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1321 }, { "epoch": 0.8293601003764115, "grad_norm": 2.4208033084869385, "learning_rate": 3.6386321823325242e-06, "loss": 1.1557, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1322 }, { "epoch": 0.8299874529485571, "grad_norm": 1.7591361999511719, "learning_rate": 3.636195234150911e-06, "loss": 1.3991, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1323 }, { "epoch": 0.8306148055207027, "grad_norm": 1.894167423248291, "learning_rate": 3.6337569245595007e-06, "loss": 1.2257, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1324 }, { "epoch": 0.8312421580928482, "grad_norm": 1.8774477243423462, "learning_rate": 3.6313172564799193e-06, "loss": 1.2091, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1325 }, { "epoch": 0.8318695106649937, "grad_norm": 1.8875328302383423, "learning_rate": 3.628876232835421e-06, "loss": 1.3331, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1326 }, { "epoch": 0.8324968632371392, "grad_norm": 1.873525857925415, "learning_rate": 3.626433856550886e-06, "loss": 1.2277, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1327 }, { "epoch": 0.8331242158092849, "grad_norm": 1.8272095918655396, "learning_rate": 3.623990130552813e-06, "loss": 1.3521, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1328 }, { "epoch": 0.8337515683814304, "grad_norm": 2.8294026851654053, "learning_rate": 3.6215450577693196e-06, "loss": 1.2214, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1329 }, { "epoch": 0.8343789209535759, "grad_norm": 1.6993393898010254, "learning_rate": 3.6190986411301354e-06, "loss": 1.1745, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1330 }, { "epoch": 0.8350062735257214, "grad_norm": 1.560639500617981, "learning_rate": 3.6166508835666026e-06, "loss": 1.2045, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1331 }, { "epoch": 0.835633626097867, "grad_norm": 1.559907078742981, "learning_rate": 3.6142017880116685e-06, "loss": 1.1456, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1332 }, { "epoch": 0.8362609786700126, "grad_norm": 1.6818002462387085, "learning_rate": 3.611751357399884e-06, "loss": 1.1618, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1333 }, { "epoch": 0.8368883312421581, "grad_norm": 1.8955743312835693, "learning_rate": 3.6092995946673996e-06, "loss": 1.291, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1334 }, { "epoch": 0.8375156838143036, "grad_norm": 1.8564447164535522, "learning_rate": 3.606846502751962e-06, "loss": 1.3731, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1335 }, { "epoch": 0.8381430363864492, "grad_norm": 1.6093361377716064, "learning_rate": 3.6043920845929093e-06, "loss": 1.1482, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1336 }, { "epoch": 0.8387703889585947, "grad_norm": 1.5696303844451904, "learning_rate": 3.6019363431311715e-06, "loss": 1.2315, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1337 }, { "epoch": 0.8393977415307403, "grad_norm": 1.8272252082824707, "learning_rate": 3.599479281309263e-06, "loss": 1.2891, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1338 }, { "epoch": 0.8400250941028858, "grad_norm": 1.5957363843917847, "learning_rate": 3.597020902071278e-06, "loss": 1.2716, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1339 }, { "epoch": 0.8406524466750314, "grad_norm": 2.0031421184539795, "learning_rate": 3.594561208362894e-06, "loss": 1.1314, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1340 }, { "epoch": 0.8412797992471769, "grad_norm": 1.8888684511184692, "learning_rate": 3.5921002031313586e-06, "loss": 1.3124, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1341 }, { "epoch": 0.8419071518193224, "grad_norm": 1.83388352394104, "learning_rate": 3.5896378893254936e-06, "loss": 1.2654, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1342 }, { "epoch": 0.842534504391468, "grad_norm": 1.7649545669555664, "learning_rate": 3.587174269895688e-06, "loss": 1.2907, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1343 }, { "epoch": 0.8431618569636136, "grad_norm": 1.8308320045471191, "learning_rate": 3.5847093477938955e-06, "loss": 1.2235, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1344 }, { "epoch": 0.8437892095357591, "grad_norm": 1.8020336627960205, "learning_rate": 3.582243125973631e-06, "loss": 1.2072, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1345 }, { "epoch": 0.8444165621079046, "grad_norm": 2.188171148300171, "learning_rate": 3.5797756073899665e-06, "loss": 1.2954, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1346 }, { "epoch": 0.8450439146800501, "grad_norm": 2.0063283443450928, "learning_rate": 3.577306794999527e-06, "loss": 1.2451, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1347 }, { "epoch": 0.8456712672521958, "grad_norm": 1.9352948665618896, "learning_rate": 3.574836691760489e-06, "loss": 1.2155, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1348 }, { "epoch": 0.8462986198243413, "grad_norm": 1.7398347854614258, "learning_rate": 3.572365300632574e-06, "loss": 1.338, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1349 }, { "epoch": 0.8469259723964868, "grad_norm": 1.6711233854293823, "learning_rate": 3.5698926245770495e-06, "loss": 1.4667, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1350 }, { "epoch": 0.8475533249686323, "grad_norm": 1.8236483335494995, "learning_rate": 3.5674186665567197e-06, "loss": 1.379, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1351 }, { "epoch": 0.848180677540778, "grad_norm": 1.9310721158981323, "learning_rate": 3.5649434295359265e-06, "loss": 1.1314, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1352 }, { "epoch": 0.8488080301129235, "grad_norm": 1.820559024810791, "learning_rate": 3.562466916480544e-06, "loss": 1.3131, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1353 }, { "epoch": 0.849435382685069, "grad_norm": 1.7749238014221191, "learning_rate": 3.5599891303579747e-06, "loss": 1.3218, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1354 }, { "epoch": 0.8500627352572145, "grad_norm": 1.6481010913848877, "learning_rate": 3.557510074137147e-06, "loss": 1.1834, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1355 }, { "epoch": 0.8506900878293601, "grad_norm": 1.7191163301467896, "learning_rate": 3.5550297507885123e-06, "loss": 1.3883, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1356 }, { "epoch": 0.8513174404015057, "grad_norm": 1.464750051498413, "learning_rate": 3.552548163284038e-06, "loss": 1.2309, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1357 }, { "epoch": 0.8519447929736512, "grad_norm": 1.6320300102233887, "learning_rate": 3.5500653145972085e-06, "loss": 1.2576, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1358 }, { "epoch": 0.8525721455457967, "grad_norm": 1.6275529861450195, "learning_rate": 3.547581207703017e-06, "loss": 1.1981, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1359 }, { "epoch": 0.8531994981179423, "grad_norm": 1.5294578075408936, "learning_rate": 3.545095845577967e-06, "loss": 1.2997, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1360 }, { "epoch": 0.8538268506900878, "grad_norm": 1.5798944234848022, "learning_rate": 3.5426092312000627e-06, "loss": 1.3639, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1361 }, { "epoch": 0.8544542032622334, "grad_norm": 1.7874319553375244, "learning_rate": 3.540121367548811e-06, "loss": 1.3735, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1362 }, { "epoch": 0.855081555834379, "grad_norm": 1.8202686309814453, "learning_rate": 3.5376322576052176e-06, "loss": 1.235, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1363 }, { "epoch": 0.8557089084065245, "grad_norm": 1.403754711151123, "learning_rate": 3.5351419043517764e-06, "loss": 1.14, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1364 }, { "epoch": 0.85633626097867, "grad_norm": 1.901134967803955, "learning_rate": 3.5326503107724763e-06, "loss": 1.2498, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1365 }, { "epoch": 0.8569636135508155, "grad_norm": 1.5691863298416138, "learning_rate": 3.5301574798527895e-06, "loss": 1.3065, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1366 }, { "epoch": 0.8575909661229612, "grad_norm": 1.2738248109817505, "learning_rate": 3.5276634145796706e-06, "loss": 1.1509, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1367 }, { "epoch": 0.8582183186951067, "grad_norm": 1.6212133169174194, "learning_rate": 3.525168117941554e-06, "loss": 1.2146, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1368 }, { "epoch": 0.8588456712672522, "grad_norm": 1.5654613971710205, "learning_rate": 3.5226715929283507e-06, "loss": 1.2141, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1369 }, { "epoch": 0.8594730238393977, "grad_norm": 1.9199938774108887, "learning_rate": 3.5201738425314403e-06, "loss": 1.1485, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1370 }, { "epoch": 0.8601003764115432, "grad_norm": 1.919198751449585, "learning_rate": 3.5176748697436746e-06, "loss": 1.3639, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1371 }, { "epoch": 0.8607277289836889, "grad_norm": 1.6670496463775635, "learning_rate": 3.5151746775593676e-06, "loss": 1.0675, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1372 }, { "epoch": 0.8613550815558344, "grad_norm": 1.9802881479263306, "learning_rate": 3.5126732689742953e-06, "loss": 1.2979, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1373 }, { "epoch": 0.8619824341279799, "grad_norm": 1.9427763223648071, "learning_rate": 3.5101706469856913e-06, "loss": 1.2531, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1374 }, { "epoch": 0.8626097867001254, "grad_norm": 1.8517464399337769, "learning_rate": 3.507666814592242e-06, "loss": 1.3706, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1375 }, { "epoch": 0.863237139272271, "grad_norm": 1.6945019960403442, "learning_rate": 3.505161774794085e-06, "loss": 1.2589, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1376 }, { "epoch": 0.8638644918444166, "grad_norm": 1.1320945024490356, "learning_rate": 3.5026555305928047e-06, "loss": 1.1627, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1377 }, { "epoch": 0.8644918444165621, "grad_norm": 1.643018364906311, "learning_rate": 3.500148084991428e-06, "loss": 1.2658, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1378 }, { "epoch": 0.8651191969887077, "grad_norm": 1.681472659111023, "learning_rate": 3.4976394409944236e-06, "loss": 1.3512, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1379 }, { "epoch": 0.8657465495608532, "grad_norm": 1.6602472066879272, "learning_rate": 3.495129601607692e-06, "loss": 1.3452, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1380 }, { "epoch": 0.8663739021329987, "grad_norm": 1.923701286315918, "learning_rate": 3.492618569838571e-06, "loss": 1.3233, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1381 }, { "epoch": 0.8670012547051443, "grad_norm": 1.7094676494598389, "learning_rate": 3.490106348695822e-06, "loss": 1.264, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1382 }, { "epoch": 0.8676286072772899, "grad_norm": 1.6848981380462646, "learning_rate": 3.487592941189636e-06, "loss": 1.1531, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1383 }, { "epoch": 0.8682559598494354, "grad_norm": 1.67879319190979, "learning_rate": 3.485078350331622e-06, "loss": 1.1951, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1384 }, { "epoch": 0.8688833124215809, "grad_norm": 1.8846187591552734, "learning_rate": 3.4825625791348093e-06, "loss": 1.3897, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1385 }, { "epoch": 0.8695106649937264, "grad_norm": 1.9970777034759521, "learning_rate": 3.480045630613641e-06, "loss": 1.3452, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1386 }, { "epoch": 0.8701380175658721, "grad_norm": 1.727866768836975, "learning_rate": 3.477527507783971e-06, "loss": 1.2737, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1387 }, { "epoch": 0.8707653701380176, "grad_norm": 2.935303211212158, "learning_rate": 3.475008213663058e-06, "loss": 1.1305, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1388 }, { "epoch": 0.8713927227101631, "grad_norm": 1.5516635179519653, "learning_rate": 3.4724877512695677e-06, "loss": 1.2315, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1389 }, { "epoch": 0.8720200752823086, "grad_norm": 1.6491432189941406, "learning_rate": 3.469966123623563e-06, "loss": 1.2025, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1390 }, { "epoch": 0.8726474278544541, "grad_norm": 1.6983126401901245, "learning_rate": 3.467443333746506e-06, "loss": 1.4585, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1391 }, { "epoch": 0.8732747804265998, "grad_norm": 2.029611110687256, "learning_rate": 3.4649193846612465e-06, "loss": 1.283, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1392 }, { "epoch": 0.8739021329987453, "grad_norm": 1.6874094009399414, "learning_rate": 3.4623942793920286e-06, "loss": 1.3645, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1393 }, { "epoch": 0.8745294855708908, "grad_norm": 1.797200083732605, "learning_rate": 3.459868020964478e-06, "loss": 1.1996, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1394 }, { "epoch": 0.8751568381430364, "grad_norm": 1.703599452972412, "learning_rate": 3.457340612405604e-06, "loss": 1.3993, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1395 }, { "epoch": 0.875784190715182, "grad_norm": 1.7407021522521973, "learning_rate": 3.4548120567437932e-06, "loss": 1.1895, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1396 }, { "epoch": 0.8764115432873275, "grad_norm": 1.3256012201309204, "learning_rate": 3.4522823570088073e-06, "loss": 1.1549, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1397 }, { "epoch": 0.877038895859473, "grad_norm": 2.297656774520874, "learning_rate": 3.4497515162317786e-06, "loss": 1.3253, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1398 }, { "epoch": 0.8776662484316186, "grad_norm": 1.6978338956832886, "learning_rate": 3.4472195374452067e-06, "loss": 1.2676, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1399 }, { "epoch": 0.8782936010037641, "grad_norm": 1.733985424041748, "learning_rate": 3.444686423682954e-06, "loss": 1.197, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1400 }, { "epoch": 0.8789209535759097, "grad_norm": 2.119873285293579, "learning_rate": 3.442152177980245e-06, "loss": 1.1963, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1401 }, { "epoch": 0.8795483061480552, "grad_norm": 1.4531352519989014, "learning_rate": 3.4396168033736564e-06, "loss": 1.1837, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1402 }, { "epoch": 0.8801756587202008, "grad_norm": 1.9602431058883667, "learning_rate": 3.437080302901122e-06, "loss": 1.2665, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1403 }, { "epoch": 0.8808030112923463, "grad_norm": 1.8748997449874878, "learning_rate": 3.434542679601922e-06, "loss": 1.2853, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1404 }, { "epoch": 0.8814303638644918, "grad_norm": 1.863858938217163, "learning_rate": 3.432003936516684e-06, "loss": 1.2804, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1405 }, { "epoch": 0.8820577164366374, "grad_norm": 1.690569519996643, "learning_rate": 3.429464076687375e-06, "loss": 1.3098, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1406 }, { "epoch": 0.882685069008783, "grad_norm": 1.9281234741210938, "learning_rate": 3.4269231031573023e-06, "loss": 1.2294, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1407 }, { "epoch": 0.8833124215809285, "grad_norm": 1.8205302953720093, "learning_rate": 3.4243810189711052e-06, "loss": 1.3322, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1408 }, { "epoch": 0.883939774153074, "grad_norm": 1.8361729383468628, "learning_rate": 3.4218378271747566e-06, "loss": 1.1492, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1409 }, { "epoch": 0.8845671267252195, "grad_norm": 1.9009759426116943, "learning_rate": 3.419293530815554e-06, "loss": 1.2921, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1410 }, { "epoch": 0.8851944792973652, "grad_norm": 1.9854016304016113, "learning_rate": 3.4167481329421204e-06, "loss": 1.2964, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1411 }, { "epoch": 0.8858218318695107, "grad_norm": 2.017411470413208, "learning_rate": 3.4142016366043973e-06, "loss": 1.323, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1412 }, { "epoch": 0.8864491844416562, "grad_norm": 1.6297651529312134, "learning_rate": 3.4116540448536432e-06, "loss": 1.1715, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1413 }, { "epoch": 0.8870765370138017, "grad_norm": 1.628180742263794, "learning_rate": 3.4091053607424295e-06, "loss": 1.2592, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1414 }, { "epoch": 0.8877038895859473, "grad_norm": 1.8852691650390625, "learning_rate": 3.4065555873246348e-06, "loss": 1.2048, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1415 }, { "epoch": 0.8883312421580929, "grad_norm": 2.035797357559204, "learning_rate": 3.404004727655444e-06, "loss": 1.3187, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1416 }, { "epoch": 0.8889585947302384, "grad_norm": 1.75394606590271, "learning_rate": 3.401452784791345e-06, "loss": 1.1598, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1417 }, { "epoch": 0.8895859473023839, "grad_norm": 1.621436595916748, "learning_rate": 3.39889976179012e-06, "loss": 1.2028, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1418 }, { "epoch": 0.8902132998745295, "grad_norm": 2.014482021331787, "learning_rate": 3.396345661710849e-06, "loss": 1.3148, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1419 }, { "epoch": 0.890840652446675, "grad_norm": 1.7939558029174805, "learning_rate": 3.393790487613901e-06, "loss": 1.2393, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1420 }, { "epoch": 0.8914680050188206, "grad_norm": 1.7533239126205444, "learning_rate": 3.3912342425609325e-06, "loss": 1.246, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1421 }, { "epoch": 0.8920953575909661, "grad_norm": 1.6140942573547363, "learning_rate": 3.388676929614882e-06, "loss": 1.2078, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1422 }, { "epoch": 0.8927227101631117, "grad_norm": 1.6957402229309082, "learning_rate": 3.386118551839969e-06, "loss": 1.2466, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1423 }, { "epoch": 0.8933500627352572, "grad_norm": 1.8988665342330933, "learning_rate": 3.383559112301687e-06, "loss": 1.2817, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1424 }, { "epoch": 0.8939774153074027, "grad_norm": 2.0279648303985596, "learning_rate": 3.380998614066805e-06, "loss": 1.3186, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1425 }, { "epoch": 0.8946047678795483, "grad_norm": 1.6259845495224, "learning_rate": 3.3784370602033572e-06, "loss": 1.1776, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1426 }, { "epoch": 0.8952321204516939, "grad_norm": 1.4232373237609863, "learning_rate": 3.3758744537806443e-06, "loss": 1.1832, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1427 }, { "epoch": 0.8958594730238394, "grad_norm": 1.6897512674331665, "learning_rate": 3.3733107978692287e-06, "loss": 1.1143, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1428 }, { "epoch": 0.8964868255959849, "grad_norm": 1.6576286554336548, "learning_rate": 3.370746095540928e-06, "loss": 1.3167, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1429 }, { "epoch": 0.8971141781681304, "grad_norm": 1.9939161539077759, "learning_rate": 3.3681803498688165e-06, "loss": 1.3256, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1430 }, { "epoch": 0.8977415307402761, "grad_norm": 1.6561397314071655, "learning_rate": 3.365613563927217e-06, "loss": 1.4733, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1431 }, { "epoch": 0.8983688833124216, "grad_norm": 1.4825150966644287, "learning_rate": 3.363045740791698e-06, "loss": 1.1664, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1432 }, { "epoch": 0.8989962358845671, "grad_norm": 1.7675479650497437, "learning_rate": 3.3604768835390746e-06, "loss": 1.2684, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1433 }, { "epoch": 0.8996235884567126, "grad_norm": 1.8060680627822876, "learning_rate": 3.3579069952473964e-06, "loss": 1.4176, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1434 }, { "epoch": 0.9002509410288582, "grad_norm": 2.16556978225708, "learning_rate": 3.35533607899595e-06, "loss": 1.222, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1435 }, { "epoch": 0.9008782936010038, "grad_norm": 1.771340250968933, "learning_rate": 3.352764137865254e-06, "loss": 1.2995, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1436 }, { "epoch": 0.9015056461731493, "grad_norm": 2.0096311569213867, "learning_rate": 3.3501911749370557e-06, "loss": 1.1834, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1437 }, { "epoch": 0.9021329987452948, "grad_norm": 1.8969783782958984, "learning_rate": 3.3476171932943265e-06, "loss": 1.2817, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1438 }, { "epoch": 0.9027603513174404, "grad_norm": 1.6149653196334839, "learning_rate": 3.345042196021257e-06, "loss": 1.2378, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1439 }, { "epoch": 0.903387703889586, "grad_norm": 1.2029558420181274, "learning_rate": 3.3424661862032563e-06, "loss": 1.2035, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1440 }, { "epoch": 0.9040150564617315, "grad_norm": 1.5148495435714722, "learning_rate": 3.3398891669269466e-06, "loss": 1.2292, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1441 }, { "epoch": 0.904642409033877, "grad_norm": 1.7740296125411987, "learning_rate": 3.3373111412801594e-06, "loss": 1.3184, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1442 }, { "epoch": 0.9052697616060226, "grad_norm": 1.9513882398605347, "learning_rate": 3.3347321123519304e-06, "loss": 1.2391, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1443 }, { "epoch": 0.9058971141781681, "grad_norm": 1.9446839094161987, "learning_rate": 3.3321520832325e-06, "loss": 1.2979, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1444 }, { "epoch": 0.9065244667503137, "grad_norm": 1.897504448890686, "learning_rate": 3.3295710570133063e-06, "loss": 1.4606, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1445 }, { "epoch": 0.9071518193224593, "grad_norm": 1.7566696405410767, "learning_rate": 3.326989036786981e-06, "loss": 1.3677, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1446 }, { "epoch": 0.9077791718946048, "grad_norm": 1.8443048000335693, "learning_rate": 3.3244060256473488e-06, "loss": 1.2227, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1447 }, { "epoch": 0.9084065244667503, "grad_norm": 2.029710292816162, "learning_rate": 3.32182202668942e-06, "loss": 1.2015, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1448 }, { "epoch": 0.9090338770388958, "grad_norm": 1.9194732904434204, "learning_rate": 3.319237043009389e-06, "loss": 1.2434, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1449 }, { "epoch": 0.9096612296110415, "grad_norm": 1.645271897315979, "learning_rate": 3.3166510777046313e-06, "loss": 1.2735, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1450 }, { "epoch": 0.910288582183187, "grad_norm": 1.6370049715042114, "learning_rate": 3.314064133873696e-06, "loss": 1.1844, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1451 }, { "epoch": 0.9109159347553325, "grad_norm": 1.9140526056289673, "learning_rate": 3.311476214616307e-06, "loss": 1.2394, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1452 }, { "epoch": 0.911543287327478, "grad_norm": 1.867573857307434, "learning_rate": 3.3088873230333562e-06, "loss": 1.2614, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1453 }, { "epoch": 0.9121706398996235, "grad_norm": 1.5740164518356323, "learning_rate": 3.3062974622269006e-06, "loss": 1.3641, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1454 }, { "epoch": 0.9127979924717692, "grad_norm": 1.985310673713684, "learning_rate": 3.3037066353001595e-06, "loss": 1.2517, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1455 }, { "epoch": 0.9134253450439147, "grad_norm": 1.9480928182601929, "learning_rate": 3.301114845357507e-06, "loss": 1.2349, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1456 }, { "epoch": 0.9140526976160602, "grad_norm": 1.7110068798065186, "learning_rate": 3.2985220955044742e-06, "loss": 1.3385, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1457 }, { "epoch": 0.9146800501882058, "grad_norm": 1.584576964378357, "learning_rate": 3.2959283888477416e-06, "loss": 1.1978, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1458 }, { "epoch": 0.9153074027603513, "grad_norm": 1.6054190397262573, "learning_rate": 3.2933337284951338e-06, "loss": 1.4712, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1459 }, { "epoch": 0.9159347553324969, "grad_norm": 1.9523940086364746, "learning_rate": 3.290738117555622e-06, "loss": 1.3496, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1460 }, { "epoch": 0.9165621079046424, "grad_norm": 1.800445318222046, "learning_rate": 3.2881415591393128e-06, "loss": 1.3048, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1461 }, { "epoch": 0.917189460476788, "grad_norm": 1.6035422086715698, "learning_rate": 3.2855440563574516e-06, "loss": 1.4824, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1462 }, { "epoch": 0.9178168130489335, "grad_norm": 2.0005290508270264, "learning_rate": 3.2829456123224115e-06, "loss": 1.2858, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1463 }, { "epoch": 0.918444165621079, "grad_norm": 1.987656593322754, "learning_rate": 3.2803462301476962e-06, "loss": 1.3675, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1464 }, { "epoch": 0.9190715181932246, "grad_norm": 1.3656636476516724, "learning_rate": 3.277745912947933e-06, "loss": 1.1349, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1465 }, { "epoch": 0.9196988707653702, "grad_norm": 1.7897242307662964, "learning_rate": 3.2751446638388694e-06, "loss": 1.3293, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1466 }, { "epoch": 0.9203262233375157, "grad_norm": 1.4551517963409424, "learning_rate": 3.272542485937369e-06, "loss": 1.2862, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1467 }, { "epoch": 0.9209535759096612, "grad_norm": 1.6863459348678589, "learning_rate": 3.2699393823614088e-06, "loss": 1.2697, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1468 }, { "epoch": 0.9215809284818067, "grad_norm": 1.777715802192688, "learning_rate": 3.2673353562300753e-06, "loss": 1.1703, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1469 }, { "epoch": 0.9222082810539524, "grad_norm": 1.701059103012085, "learning_rate": 3.2647304106635587e-06, "loss": 1.2239, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1470 }, { "epoch": 0.9228356336260979, "grad_norm": 2.0280165672302246, "learning_rate": 3.2621245487831533e-06, "loss": 1.3686, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1471 }, { "epoch": 0.9234629861982434, "grad_norm": 1.660024881362915, "learning_rate": 3.25951777371125e-06, "loss": 1.3431, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1472 }, { "epoch": 0.9240903387703889, "grad_norm": 1.6660288572311401, "learning_rate": 3.2569100885713333e-06, "loss": 1.5176, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1473 }, { "epoch": 0.9247176913425345, "grad_norm": 1.8647363185882568, "learning_rate": 3.2543014964879814e-06, "loss": 1.2584, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1474 }, { "epoch": 0.9253450439146801, "grad_norm": 1.8955433368682861, "learning_rate": 3.2516920005868536e-06, "loss": 1.1977, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1475 }, { "epoch": 0.9259723964868256, "grad_norm": 1.7556467056274414, "learning_rate": 3.249081603994698e-06, "loss": 1.3804, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1476 }, { "epoch": 0.9265997490589711, "grad_norm": 1.8404650688171387, "learning_rate": 3.2464703098393376e-06, "loss": 1.1758, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1477 }, { "epoch": 0.9272271016311167, "grad_norm": 1.8023796081542969, "learning_rate": 3.2438581212496723e-06, "loss": 1.2302, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1478 }, { "epoch": 0.9278544542032622, "grad_norm": 1.2507985830307007, "learning_rate": 3.2412450413556753e-06, "loss": 1.239, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1479 }, { "epoch": 0.9284818067754078, "grad_norm": 1.8706785440444946, "learning_rate": 3.2386310732883865e-06, "loss": 1.1204, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1480 }, { "epoch": 0.9291091593475533, "grad_norm": 1.7828782796859741, "learning_rate": 3.2360162201799085e-06, "loss": 1.2433, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1481 }, { "epoch": 0.9297365119196989, "grad_norm": 1.877037763595581, "learning_rate": 3.2334004851634073e-06, "loss": 1.1678, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1482 }, { "epoch": 0.9303638644918444, "grad_norm": 1.7916743755340576, "learning_rate": 3.2307838713731033e-06, "loss": 1.3769, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1483 }, { "epoch": 0.93099121706399, "grad_norm": 1.0694576501846313, "learning_rate": 3.228166381944272e-06, "loss": 1.0804, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1484 }, { "epoch": 0.9316185696361355, "grad_norm": 1.9489041566848755, "learning_rate": 3.225548020013235e-06, "loss": 1.1571, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1485 }, { "epoch": 0.9322459222082811, "grad_norm": 1.5911556482315063, "learning_rate": 3.2229287887173623e-06, "loss": 1.3908, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1486 }, { "epoch": 0.9328732747804266, "grad_norm": 1.844138741493225, "learning_rate": 3.2203086911950643e-06, "loss": 1.1133, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1487 }, { "epoch": 0.9335006273525721, "grad_norm": 1.7832318544387817, "learning_rate": 3.21768773058579e-06, "loss": 1.3063, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1488 }, { "epoch": 0.9341279799247177, "grad_norm": 1.8222450017929077, "learning_rate": 3.2150659100300215e-06, "loss": 1.2093, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1489 }, { "epoch": 0.9347553324968633, "grad_norm": 1.9622671604156494, "learning_rate": 3.2124432326692717e-06, "loss": 1.2801, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1490 }, { "epoch": 0.9353826850690088, "grad_norm": 2.0116896629333496, "learning_rate": 3.2098197016460807e-06, "loss": 1.2931, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1491 }, { "epoch": 0.9360100376411543, "grad_norm": 1.770477056503296, "learning_rate": 3.2071953201040113e-06, "loss": 1.2336, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1492 }, { "epoch": 0.9366373902132998, "grad_norm": 1.954681396484375, "learning_rate": 3.2045700911876443e-06, "loss": 1.2612, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1493 }, { "epoch": 0.9372647427854455, "grad_norm": 1.5379961729049683, "learning_rate": 3.2019440180425774e-06, "loss": 1.209, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1494 }, { "epoch": 0.937892095357591, "grad_norm": 1.8084098100662231, "learning_rate": 3.1993171038154203e-06, "loss": 1.4448, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1495 }, { "epoch": 0.9385194479297365, "grad_norm": 1.5959023237228394, "learning_rate": 3.1966893516537877e-06, "loss": 1.2557, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1496 }, { "epoch": 0.939146800501882, "grad_norm": 1.8980833292007446, "learning_rate": 3.1940607647062997e-06, "loss": 1.2172, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1497 }, { "epoch": 0.9397741530740276, "grad_norm": 1.6335502862930298, "learning_rate": 3.191431346122579e-06, "loss": 1.1199, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1498 }, { "epoch": 0.9404015056461732, "grad_norm": 1.936535358428955, "learning_rate": 3.1888010990532412e-06, "loss": 1.2016, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1499 }, { "epoch": 0.9410288582183187, "grad_norm": 2.046355962753296, "learning_rate": 3.1861700266498975e-06, "loss": 1.2847, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1500 }, { "epoch": 0.9416562107904642, "grad_norm": 1.6399589776992798, "learning_rate": 3.183538132065145e-06, "loss": 1.1248, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1501 }, { "epoch": 0.9422835633626098, "grad_norm": 2.1181039810180664, "learning_rate": 3.180905418452569e-06, "loss": 1.2885, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1502 }, { "epoch": 0.9429109159347553, "grad_norm": 1.5633800029754639, "learning_rate": 3.178271888966734e-06, "loss": 1.1638, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1503 }, { "epoch": 0.9435382685069009, "grad_norm": 2.5201916694641113, "learning_rate": 3.1756375467631832e-06, "loss": 1.2562, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1504 }, { "epoch": 0.9441656210790464, "grad_norm": 1.6307491064071655, "learning_rate": 3.173002394998434e-06, "loss": 1.2839, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1505 }, { "epoch": 0.944792973651192, "grad_norm": 1.7592899799346924, "learning_rate": 3.1703664368299724e-06, "loss": 1.2898, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1506 }, { "epoch": 0.9454203262233375, "grad_norm": 1.8422400951385498, "learning_rate": 3.167729675416252e-06, "loss": 1.1932, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1507 }, { "epoch": 0.946047678795483, "grad_norm": 1.5810227394104004, "learning_rate": 3.165092113916688e-06, "loss": 1.0733, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1508 }, { "epoch": 0.9466750313676286, "grad_norm": 1.9519747495651245, "learning_rate": 3.162453755491655e-06, "loss": 1.3441, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1509 }, { "epoch": 0.9473023839397742, "grad_norm": 1.8681365251541138, "learning_rate": 3.1598146033024805e-06, "loss": 1.2143, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1510 }, { "epoch": 0.9479297365119197, "grad_norm": 1.7927610874176025, "learning_rate": 3.1571746605114466e-06, "loss": 1.2172, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1511 }, { "epoch": 0.9485570890840652, "grad_norm": 1.7754586935043335, "learning_rate": 3.1545339302817796e-06, "loss": 1.1724, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1512 }, { "epoch": 0.9491844416562107, "grad_norm": 1.8852242231369019, "learning_rate": 3.1518924157776507e-06, "loss": 1.3607, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1513 }, { "epoch": 0.9498117942283564, "grad_norm": 1.8877124786376953, "learning_rate": 3.149250120164171e-06, "loss": 1.3618, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1514 }, { "epoch": 0.9504391468005019, "grad_norm": 1.8442416191101074, "learning_rate": 3.1466070466073874e-06, "loss": 1.2654, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1515 }, { "epoch": 0.9510664993726474, "grad_norm": 1.7711665630340576, "learning_rate": 3.143963198274278e-06, "loss": 1.272, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1516 }, { "epoch": 0.951693851944793, "grad_norm": 2.4715964794158936, "learning_rate": 3.1413185783327503e-06, "loss": 1.2734, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1517 }, { "epoch": 0.9523212045169385, "grad_norm": 1.8979958295822144, "learning_rate": 3.1386731899516353e-06, "loss": 1.3359, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1518 }, { "epoch": 0.9529485570890841, "grad_norm": 2.25026273727417, "learning_rate": 3.136027036300687e-06, "loss": 1.3138, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1519 }, { "epoch": 0.9535759096612296, "grad_norm": 2.0142345428466797, "learning_rate": 3.1333801205505734e-06, "loss": 1.3283, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1520 }, { "epoch": 0.9542032622333751, "grad_norm": 1.6834921836853027, "learning_rate": 3.130732445872877e-06, "loss": 1.1996, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1521 }, { "epoch": 0.9548306148055207, "grad_norm": 1.9240096807479858, "learning_rate": 3.128084015440092e-06, "loss": 1.1892, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1522 }, { "epoch": 0.9554579673776662, "grad_norm": 1.3459506034851074, "learning_rate": 3.125434832425613e-06, "loss": 1.12, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1523 }, { "epoch": 0.9560853199498118, "grad_norm": 1.0296326875686646, "learning_rate": 3.122784900003742e-06, "loss": 1.1246, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1524 }, { "epoch": 0.9567126725219574, "grad_norm": 1.813663125038147, "learning_rate": 3.120134221349674e-06, "loss": 1.2281, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1525 }, { "epoch": 0.9573400250941029, "grad_norm": 1.8600475788116455, "learning_rate": 3.117482799639502e-06, "loss": 1.2991, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1526 }, { "epoch": 0.9579673776662484, "grad_norm": 1.82486891746521, "learning_rate": 3.114830638050206e-06, "loss": 1.2133, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1527 }, { "epoch": 0.958594730238394, "grad_norm": 1.6651502847671509, "learning_rate": 3.112177739759656e-06, "loss": 1.2577, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1528 }, { "epoch": 0.9592220828105396, "grad_norm": 1.9689431190490723, "learning_rate": 3.109524107946602e-06, "loss": 1.3149, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1529 }, { "epoch": 0.9598494353826851, "grad_norm": 1.5160422325134277, "learning_rate": 3.1068697457906736e-06, "loss": 1.2537, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1530 }, { "epoch": 0.9604767879548306, "grad_norm": 1.9524043798446655, "learning_rate": 3.104214656472377e-06, "loss": 1.3181, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1531 }, { "epoch": 0.9611041405269761, "grad_norm": 1.8175241947174072, "learning_rate": 3.1015588431730873e-06, "loss": 1.23, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1532 }, { "epoch": 0.9617314930991218, "grad_norm": 1.801020622253418, "learning_rate": 3.0989023090750496e-06, "loss": 1.2502, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1533 }, { "epoch": 0.9623588456712673, "grad_norm": 1.8386653661727905, "learning_rate": 3.0962450573613705e-06, "loss": 1.2601, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1534 }, { "epoch": 0.9629861982434128, "grad_norm": 3.8357949256896973, "learning_rate": 3.093587091216017e-06, "loss": 1.234, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1535 }, { "epoch": 0.9636135508155583, "grad_norm": 1.52729070186615, "learning_rate": 3.0909284138238136e-06, "loss": 1.1171, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1536 }, { "epoch": 0.9642409033877039, "grad_norm": 1.551767349243164, "learning_rate": 3.0882690283704355e-06, "loss": 1.0957, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1537 }, { "epoch": 0.9648682559598495, "grad_norm": 1.573925256729126, "learning_rate": 3.085608938042406e-06, "loss": 1.3389, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1538 }, { "epoch": 0.965495608531995, "grad_norm": 1.9069485664367676, "learning_rate": 3.0829481460270937e-06, "loss": 1.3142, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1539 }, { "epoch": 0.9661229611041405, "grad_norm": 1.6924388408660889, "learning_rate": 3.0802866555127094e-06, "loss": 1.2382, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1540 }, { "epoch": 0.966750313676286, "grad_norm": 1.9501606225967407, "learning_rate": 3.077624469688299e-06, "loss": 1.3925, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1541 }, { "epoch": 0.9673776662484316, "grad_norm": 1.637292742729187, "learning_rate": 3.074961591743741e-06, "loss": 1.1863, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1542 }, { "epoch": 0.9680050188205772, "grad_norm": 1.2631224393844604, "learning_rate": 3.0722980248697447e-06, "loss": 1.2935, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1543 }, { "epoch": 0.9686323713927227, "grad_norm": 1.5776970386505127, "learning_rate": 3.0696337722578444e-06, "loss": 1.3131, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1544 }, { "epoch": 0.9692597239648683, "grad_norm": 1.8164136409759521, "learning_rate": 3.0669688371003962e-06, "loss": 1.2615, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1545 }, { "epoch": 0.9698870765370138, "grad_norm": 1.9003275632858276, "learning_rate": 3.064303222590574e-06, "loss": 1.2848, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1546 }, { "epoch": 0.9705144291091593, "grad_norm": 1.5986956357955933, "learning_rate": 3.061636931922365e-06, "loss": 1.1825, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1547 }, { "epoch": 0.9711417816813049, "grad_norm": 1.9041141271591187, "learning_rate": 3.058969968290567e-06, "loss": 1.242, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1548 }, { "epoch": 0.9717691342534505, "grad_norm": 1.947312831878662, "learning_rate": 3.056302334890786e-06, "loss": 1.1776, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1549 }, { "epoch": 0.972396486825596, "grad_norm": 1.5843112468719482, "learning_rate": 3.053634034919428e-06, "loss": 1.1799, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1550 }, { "epoch": 0.9730238393977415, "grad_norm": 1.5609612464904785, "learning_rate": 3.0509650715736977e-06, "loss": 1.303, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1551 }, { "epoch": 0.973651191969887, "grad_norm": 1.6556776762008667, "learning_rate": 3.0482954480515963e-06, "loss": 1.1567, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1552 }, { "epoch": 0.9742785445420327, "grad_norm": 1.801066517829895, "learning_rate": 3.045625167551915e-06, "loss": 1.2528, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1553 }, { "epoch": 0.9749058971141782, "grad_norm": 1.9137632846832275, "learning_rate": 3.0429542332742322e-06, "loss": 1.2109, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1554 }, { "epoch": 0.9755332496863237, "grad_norm": 2.0313117504119873, "learning_rate": 3.040282648418911e-06, "loss": 1.226, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1555 }, { "epoch": 0.9761606022584692, "grad_norm": 1.9553405046463013, "learning_rate": 3.0376104161870922e-06, "loss": 1.3244, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1556 }, { "epoch": 0.9767879548306148, "grad_norm": 1.9397183656692505, "learning_rate": 3.0349375397806928e-06, "loss": 1.2911, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1557 }, { "epoch": 0.9774153074027604, "grad_norm": 1.5607104301452637, "learning_rate": 3.0322640224024024e-06, "loss": 1.3396, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1558 }, { "epoch": 0.9780426599749059, "grad_norm": 1.742556095123291, "learning_rate": 3.0295898672556785e-06, "loss": 1.2465, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1559 }, { "epoch": 0.9786700125470514, "grad_norm": 1.6819292306900024, "learning_rate": 3.0269150775447414e-06, "loss": 1.1875, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1560 }, { "epoch": 0.979297365119197, "grad_norm": 1.7650625705718994, "learning_rate": 3.0242396564745736e-06, "loss": 1.2395, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1561 }, { "epoch": 0.9799247176913425, "grad_norm": 1.3795490264892578, "learning_rate": 3.0215636072509134e-06, "loss": 1.1491, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1562 }, { "epoch": 0.9805520702634881, "grad_norm": 1.8588756322860718, "learning_rate": 3.0188869330802512e-06, "loss": 1.3543, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1563 }, { "epoch": 0.9811794228356336, "grad_norm": 2.0225319862365723, "learning_rate": 3.016209637169827e-06, "loss": 1.3106, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1564 }, { "epoch": 0.9818067754077792, "grad_norm": 1.8785102367401123, "learning_rate": 3.0135317227276247e-06, "loss": 1.3376, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1565 }, { "epoch": 0.9824341279799247, "grad_norm": 1.3855329751968384, "learning_rate": 3.0108531929623708e-06, "loss": 1.1439, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1566 }, { "epoch": 0.9830614805520702, "grad_norm": 1.591722011566162, "learning_rate": 3.0081740510835287e-06, "loss": 1.1241, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1567 }, { "epoch": 0.9836888331242158, "grad_norm": 1.98593008518219, "learning_rate": 3.005494300301294e-06, "loss": 1.1092, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1568 }, { "epoch": 0.9843161856963614, "grad_norm": 1.7478628158569336, "learning_rate": 3.0028139438265946e-06, "loss": 1.3299, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1569 }, { "epoch": 0.9849435382685069, "grad_norm": 1.8850418329238892, "learning_rate": 3.0001329848710803e-06, "loss": 1.2433, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1570 }, { "epoch": 0.9855708908406524, "grad_norm": 1.8818042278289795, "learning_rate": 2.9974514266471254e-06, "loss": 1.3072, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1571 }, { "epoch": 0.986198243412798, "grad_norm": 1.5135859251022339, "learning_rate": 2.994769272367822e-06, "loss": 1.4054, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1572 }, { "epoch": 0.9868255959849436, "grad_norm": 1.7623196840286255, "learning_rate": 2.9920865252469764e-06, "loss": 1.3091, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1573 }, { "epoch": 0.9874529485570891, "grad_norm": 1.6980853080749512, "learning_rate": 2.9894031884991047e-06, "loss": 1.2509, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1574 }, { "epoch": 0.9880803011292346, "grad_norm": 1.5661550760269165, "learning_rate": 2.9867192653394307e-06, "loss": 1.3949, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1575 }, { "epoch": 0.9887076537013801, "grad_norm": 1.6995539665222168, "learning_rate": 2.9840347589838786e-06, "loss": 1.3404, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1576 }, { "epoch": 0.9893350062735258, "grad_norm": 2.477058172225952, "learning_rate": 2.9813496726490732e-06, "loss": 1.1899, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1577 }, { "epoch": 0.9899623588456713, "grad_norm": 2.359335422515869, "learning_rate": 2.978664009552334e-06, "loss": 1.3828, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1578 }, { "epoch": 0.9905897114178168, "grad_norm": 1.6977730989456177, "learning_rate": 2.975977772911671e-06, "loss": 1.1621, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1579 }, { "epoch": 0.9912170639899623, "grad_norm": 1.6472805738449097, "learning_rate": 2.9732909659457827e-06, "loss": 1.3421, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1580 }, { "epoch": 0.9918444165621079, "grad_norm": 1.8243848085403442, "learning_rate": 2.970603591874049e-06, "loss": 1.3114, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1581 }, { "epoch": 0.9924717691342535, "grad_norm": 1.5195167064666748, "learning_rate": 2.967915653916531e-06, "loss": 1.1619, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1582 }, { "epoch": 0.993099121706399, "grad_norm": 1.8628544807434082, "learning_rate": 2.965227155293965e-06, "loss": 1.3999, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1583 }, { "epoch": 0.9937264742785445, "grad_norm": 1.9833089113235474, "learning_rate": 2.9625380992277585e-06, "loss": 1.4334, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1584 }, { "epoch": 0.9943538268506901, "grad_norm": 1.512103796005249, "learning_rate": 2.9598484889399873e-06, "loss": 1.2211, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1585 }, { "epoch": 0.9949811794228356, "grad_norm": 1.7887911796569824, "learning_rate": 2.9571583276533923e-06, "loss": 1.2617, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1586 }, { "epoch": 0.9956085319949812, "grad_norm": 1.7717543840408325, "learning_rate": 2.9544676185913725e-06, "loss": 1.3387, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1587 }, { "epoch": 0.9962358845671268, "grad_norm": 1.646287202835083, "learning_rate": 2.951776364977985e-06, "loss": 1.2494, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1588 }, { "epoch": 0.9968632371392723, "grad_norm": 1.721743106842041, "learning_rate": 2.949084570037939e-06, "loss": 1.402, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1589 }, { "epoch": 0.9974905897114178, "grad_norm": 1.685555338859558, "learning_rate": 2.946392236996592e-06, "loss": 1.2543, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1590 }, { "epoch": 0.9981179422835633, "grad_norm": 1.7904672622680664, "learning_rate": 2.9436993690799464e-06, "loss": 1.2328, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1591 }, { "epoch": 0.998745294855709, "grad_norm": 1.5652368068695068, "learning_rate": 2.941005969514646e-06, "loss": 1.2202, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1592 }, { "epoch": 0.9993726474278545, "grad_norm": 1.5595532655715942, "learning_rate": 2.93831204152797e-06, "loss": 1.2724, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1593 }, { "epoch": 1.0, "grad_norm": 1.7482446432113647, "learning_rate": 2.935617588347832e-06, "loss": 1.1597, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1594 }, { "epoch": 1.0006273525721456, "grad_norm": 1.098419189453125, "learning_rate": 2.932922613202777e-06, "loss": 1.273, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1595 }, { "epoch": 1.001254705144291, "grad_norm": 1.6278892755508423, "learning_rate": 2.9302271193219705e-06, "loss": 1.1618, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1596 }, { "epoch": 1.0018820577164367, "grad_norm": 1.8631848096847534, "learning_rate": 2.927531109935204e-06, "loss": 1.248, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1597 }, { "epoch": 1.002509410288582, "grad_norm": 1.707324743270874, "learning_rate": 2.924834588272884e-06, "loss": 1.1259, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1598 }, { "epoch": 1.0031367628607277, "grad_norm": 1.3775336742401123, "learning_rate": 2.922137557566032e-06, "loss": 1.2228, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1599 }, { "epoch": 1.0037641154328734, "grad_norm": 1.8195487260818481, "learning_rate": 2.9194400210462808e-06, "loss": 1.3825, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1600 }, { "epoch": 1.0043914680050188, "grad_norm": 1.2769285440444946, "learning_rate": 2.9167419819458653e-06, "loss": 1.2026, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1601 }, { "epoch": 1.0050188205771644, "grad_norm": 1.8686038255691528, "learning_rate": 2.914043443497627e-06, "loss": 1.1631, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1602 }, { "epoch": 1.0056461731493098, "grad_norm": 1.7626304626464844, "learning_rate": 2.9113444089350025e-06, "loss": 1.234, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1603 }, { "epoch": 1.0062735257214555, "grad_norm": 1.8479125499725342, "learning_rate": 2.908644881492024e-06, "loss": 1.2123, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1604 }, { "epoch": 1.006900878293601, "grad_norm": 1.9348218441009521, "learning_rate": 2.9059448644033157e-06, "loss": 1.1826, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1605 }, { "epoch": 1.0075282308657465, "grad_norm": 1.7956023216247559, "learning_rate": 2.903244360904086e-06, "loss": 1.1822, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1606 }, { "epoch": 1.0081555834378921, "grad_norm": 2.6866087913513184, "learning_rate": 2.9005433742301274e-06, "loss": 1.2719, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1607 }, { "epoch": 1.0087829360100375, "grad_norm": 1.9031373262405396, "learning_rate": 2.897841907617813e-06, "loss": 1.3587, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1608 }, { "epoch": 1.0094102885821832, "grad_norm": 1.678466796875, "learning_rate": 2.8951399643040867e-06, "loss": 1.3873, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1609 }, { "epoch": 1.0100376411543288, "grad_norm": 1.8993548154830933, "learning_rate": 2.892437547526468e-06, "loss": 1.3635, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1610 }, { "epoch": 1.0106649937264742, "grad_norm": 1.76960027217865, "learning_rate": 2.8897346605230403e-06, "loss": 1.2992, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1611 }, { "epoch": 1.0112923462986199, "grad_norm": 1.396877408027649, "learning_rate": 2.887031306532453e-06, "loss": 1.2675, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1612 }, { "epoch": 1.0119196988707653, "grad_norm": 1.9383916854858398, "learning_rate": 2.8843274887939136e-06, "loss": 1.3098, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1613 }, { "epoch": 1.012547051442911, "grad_norm": 1.8831377029418945, "learning_rate": 2.8816232105471864e-06, "loss": 1.2467, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1614 }, { "epoch": 1.0131744040150565, "grad_norm": 1.6349576711654663, "learning_rate": 2.878918475032586e-06, "loss": 1.2172, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1615 }, { "epoch": 1.013801756587202, "grad_norm": 1.781866431236267, "learning_rate": 2.876213285490976e-06, "loss": 1.2, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1616 }, { "epoch": 1.0144291091593476, "grad_norm": 1.9570376873016357, "learning_rate": 2.873507645163765e-06, "loss": 1.3187, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1617 }, { "epoch": 1.015056461731493, "grad_norm": 1.9935839176177979, "learning_rate": 2.8708015572928978e-06, "loss": 1.3265, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1618 }, { "epoch": 1.0156838143036386, "grad_norm": 6.492181777954102, "learning_rate": 2.8680950251208595e-06, "loss": 1.3201, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1619 }, { "epoch": 1.0163111668757843, "grad_norm": 1.873011589050293, "learning_rate": 2.8653880518906662e-06, "loss": 1.3138, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1620 }, { "epoch": 1.0169385194479297, "grad_norm": 1.8839685916900635, "learning_rate": 2.8626806408458626e-06, "loss": 1.2164, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1621 }, { "epoch": 1.0175658720200753, "grad_norm": 1.667243242263794, "learning_rate": 2.8599727952305173e-06, "loss": 1.4182, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1622 }, { "epoch": 1.0181932245922207, "grad_norm": 1.6265138387680054, "learning_rate": 2.8572645182892215e-06, "loss": 1.1193, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1623 }, { "epoch": 1.0188205771643664, "grad_norm": 1.573209524154663, "learning_rate": 2.8545558132670804e-06, "loss": 1.2317, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1624 }, { "epoch": 1.019447929736512, "grad_norm": 1.759530782699585, "learning_rate": 2.851846683409715e-06, "loss": 1.2441, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1625 }, { "epoch": 1.0200752823086574, "grad_norm": 1.9403883218765259, "learning_rate": 2.8491371319632515e-06, "loss": 1.288, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1626 }, { "epoch": 1.020702634880803, "grad_norm": 1.501510500907898, "learning_rate": 2.8464271621743266e-06, "loss": 1.2533, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1627 }, { "epoch": 1.0213299874529485, "grad_norm": 1.815144658088684, "learning_rate": 2.843716777290074e-06, "loss": 1.352, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1628 }, { "epoch": 1.021957340025094, "grad_norm": 1.590133547782898, "learning_rate": 2.8410059805581258e-06, "loss": 1.3409, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1629 }, { "epoch": 1.0225846925972397, "grad_norm": 1.507057547569275, "learning_rate": 2.8382947752266095e-06, "loss": 1.2165, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1630 }, { "epoch": 1.0232120451693851, "grad_norm": 1.6463191509246826, "learning_rate": 2.835583164544139e-06, "loss": 1.1584, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1631 }, { "epoch": 1.0238393977415308, "grad_norm": 1.7527943849563599, "learning_rate": 2.832871151759816e-06, "loss": 1.2905, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1632 }, { "epoch": 1.0244667503136762, "grad_norm": 1.900711178779602, "learning_rate": 2.830158740123225e-06, "loss": 1.3619, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1633 }, { "epoch": 1.0250941028858218, "grad_norm": 1.7869784832000732, "learning_rate": 2.827445932884425e-06, "loss": 1.3931, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1634 }, { "epoch": 1.0257214554579674, "grad_norm": 1.4502146244049072, "learning_rate": 2.8247327332939512e-06, "loss": 1.1494, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1635 }, { "epoch": 1.0263488080301129, "grad_norm": 2.0032737255096436, "learning_rate": 2.82201914460281e-06, "loss": 1.2799, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1636 }, { "epoch": 1.0269761606022585, "grad_norm": 1.9300953149795532, "learning_rate": 2.819305170062472e-06, "loss": 1.3849, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1637 }, { "epoch": 1.027603513174404, "grad_norm": 1.5697492361068726, "learning_rate": 2.81659081292487e-06, "loss": 1.2505, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1638 }, { "epoch": 1.0282308657465495, "grad_norm": 1.7658262252807617, "learning_rate": 2.813876076442397e-06, "loss": 1.2168, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1639 }, { "epoch": 1.0288582183186952, "grad_norm": 1.8125656843185425, "learning_rate": 2.8111609638679007e-06, "loss": 1.3612, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1640 }, { "epoch": 1.0294855708908406, "grad_norm": 1.9421466588974, "learning_rate": 2.808445478454677e-06, "loss": 1.2607, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1641 }, { "epoch": 1.0301129234629862, "grad_norm": 1.39006769657135, "learning_rate": 2.805729623456469e-06, "loss": 1.1229, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1642 }, { "epoch": 1.0307402760351319, "grad_norm": 1.8550273180007935, "learning_rate": 2.8030134021274656e-06, "loss": 1.1465, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1643 }, { "epoch": 1.0313676286072773, "grad_norm": 1.9449824094772339, "learning_rate": 2.8002968177222916e-06, "loss": 1.1624, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1644 }, { "epoch": 1.031994981179423, "grad_norm": 1.8409748077392578, "learning_rate": 2.7975798734960075e-06, "loss": 1.3491, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1645 }, { "epoch": 1.0326223337515683, "grad_norm": 1.7642316818237305, "learning_rate": 2.794862572704106e-06, "loss": 1.1807, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1646 }, { "epoch": 1.033249686323714, "grad_norm": 1.9541568756103516, "learning_rate": 2.7921449186025064e-06, "loss": 1.2875, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1647 }, { "epoch": 1.0338770388958596, "grad_norm": 1.905375599861145, "learning_rate": 2.7894269144475505e-06, "loss": 1.3454, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1648 }, { "epoch": 1.034504391468005, "grad_norm": 1.6375961303710938, "learning_rate": 2.786708563496002e-06, "loss": 1.4185, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1649 }, { "epoch": 1.0351317440401506, "grad_norm": 1.8116945028305054, "learning_rate": 2.783989869005036e-06, "loss": 1.3068, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1650 }, { "epoch": 1.035759096612296, "grad_norm": 1.7843427658081055, "learning_rate": 2.7812708342322435e-06, "loss": 1.2902, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1651 }, { "epoch": 1.0363864491844417, "grad_norm": 2.144293785095215, "learning_rate": 2.778551462435621e-06, "loss": 1.2698, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1652 }, { "epoch": 1.0370138017565873, "grad_norm": 1.6094316244125366, "learning_rate": 2.775831756873568e-06, "loss": 1.1651, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1653 }, { "epoch": 1.0376411543287327, "grad_norm": 1.9586728811264038, "learning_rate": 2.7731117208048875e-06, "loss": 1.3646, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1654 }, { "epoch": 1.0382685069008784, "grad_norm": 1.6644067764282227, "learning_rate": 2.770391357488775e-06, "loss": 1.2683, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1655 }, { "epoch": 1.0388958594730238, "grad_norm": 1.9723621606826782, "learning_rate": 2.7676706701848187e-06, "loss": 1.3013, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1656 }, { "epoch": 1.0395232120451694, "grad_norm": 1.8578646183013916, "learning_rate": 2.764949662152997e-06, "loss": 1.2815, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1657 }, { "epoch": 1.040150564617315, "grad_norm": 1.8444366455078125, "learning_rate": 2.76222833665367e-06, "loss": 1.2592, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1658 }, { "epoch": 1.0407779171894604, "grad_norm": 1.4603006839752197, "learning_rate": 2.75950669694758e-06, "loss": 1.2188, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1659 }, { "epoch": 1.041405269761606, "grad_norm": 1.6441221237182617, "learning_rate": 2.7567847462958453e-06, "loss": 1.2206, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1660 }, { "epoch": 1.0420326223337515, "grad_norm": 1.8188725709915161, "learning_rate": 2.754062487959956e-06, "loss": 1.1723, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1661 }, { "epoch": 1.0426599749058971, "grad_norm": 2.0048978328704834, "learning_rate": 2.751339925201772e-06, "loss": 1.3691, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1662 }, { "epoch": 1.0432873274780428, "grad_norm": 1.4602092504501343, "learning_rate": 2.748617061283518e-06, "loss": 1.2087, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1663 }, { "epoch": 1.0439146800501882, "grad_norm": 1.841833472251892, "learning_rate": 2.7458938994677784e-06, "loss": 1.2076, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1664 }, { "epoch": 1.0445420326223338, "grad_norm": 1.5950837135314941, "learning_rate": 2.7431704430174953e-06, "loss": 1.171, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1665 }, { "epoch": 1.0451693851944792, "grad_norm": 1.6101592779159546, "learning_rate": 2.740446695195964e-06, "loss": 1.2858, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1666 }, { "epoch": 1.0457967377666249, "grad_norm": 1.9419490098953247, "learning_rate": 2.737722659266829e-06, "loss": 1.3255, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1667 }, { "epoch": 1.0464240903387705, "grad_norm": 1.777923822402954, "learning_rate": 2.734998338494079e-06, "loss": 1.1948, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1668 }, { "epoch": 1.047051442910916, "grad_norm": 1.7209194898605347, "learning_rate": 2.7322737361420454e-06, "loss": 1.169, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1669 }, { "epoch": 1.0476787954830615, "grad_norm": 2.2611277103424072, "learning_rate": 2.7295488554753957e-06, "loss": 1.2104, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1670 }, { "epoch": 1.048306148055207, "grad_norm": 1.8065840005874634, "learning_rate": 2.7268236997591312e-06, "loss": 1.2578, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1671 }, { "epoch": 1.0489335006273526, "grad_norm": 1.9297280311584473, "learning_rate": 2.724098272258584e-06, "loss": 1.2406, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1672 }, { "epoch": 1.0495608531994982, "grad_norm": 1.824525237083435, "learning_rate": 2.7213725762394104e-06, "loss": 1.3392, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1673 }, { "epoch": 1.0501882057716436, "grad_norm": 1.7247780561447144, "learning_rate": 2.718646614967589e-06, "loss": 1.1674, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1674 }, { "epoch": 1.0508155583437893, "grad_norm": 1.902340054512024, "learning_rate": 2.7159203917094164e-06, "loss": 1.2874, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1675 }, { "epoch": 1.0514429109159347, "grad_norm": 1.7102277278900146, "learning_rate": 2.7131939097315023e-06, "loss": 1.2785, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1676 }, { "epoch": 1.0520702634880803, "grad_norm": 1.567852258682251, "learning_rate": 2.710467172300768e-06, "loss": 1.3513, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1677 }, { "epoch": 1.052697616060226, "grad_norm": 1.7976839542388916, "learning_rate": 2.7077401826844384e-06, "loss": 1.1563, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1678 }, { "epoch": 1.0533249686323714, "grad_norm": 1.5595132112503052, "learning_rate": 2.7050129441500437e-06, "loss": 1.0841, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1679 }, { "epoch": 1.053952321204517, "grad_norm": 1.9650431871414185, "learning_rate": 2.70228545996541e-06, "loss": 1.234, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1680 }, { "epoch": 1.0545796737766624, "grad_norm": 2.415435552597046, "learning_rate": 2.699557733398657e-06, "loss": 1.2938, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1681 }, { "epoch": 1.055207026348808, "grad_norm": 1.3821427822113037, "learning_rate": 2.696829767718199e-06, "loss": 1.2192, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1682 }, { "epoch": 1.0558343789209537, "grad_norm": 1.729189395904541, "learning_rate": 2.694101566192733e-06, "loss": 1.2821, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1683 }, { "epoch": 1.056461731493099, "grad_norm": 1.5959374904632568, "learning_rate": 2.69137313209124e-06, "loss": 1.1041, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1684 }, { "epoch": 1.0570890840652447, "grad_norm": 1.8131731748580933, "learning_rate": 2.688644468682978e-06, "loss": 1.3402, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1685 }, { "epoch": 1.0577164366373901, "grad_norm": 1.918359637260437, "learning_rate": 2.685915579237482e-06, "loss": 1.3129, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1686 }, { "epoch": 1.0583437892095358, "grad_norm": 1.7511471509933472, "learning_rate": 2.6831864670245573e-06, "loss": 1.2815, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1687 }, { "epoch": 1.0589711417816814, "grad_norm": 1.9037708044052124, "learning_rate": 2.680457135314275e-06, "loss": 1.2278, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1688 }, { "epoch": 1.0595984943538268, "grad_norm": 1.8454982042312622, "learning_rate": 2.6777275873769703e-06, "loss": 1.233, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1689 }, { "epoch": 1.0602258469259724, "grad_norm": 1.8991624116897583, "learning_rate": 2.674997826483239e-06, "loss": 1.3211, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1690 }, { "epoch": 1.0608531994981178, "grad_norm": 1.03837251663208, "learning_rate": 2.672267855903927e-06, "loss": 1.0668, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1691 }, { "epoch": 1.0614805520702635, "grad_norm": 1.8049486875534058, "learning_rate": 2.669537678910138e-06, "loss": 1.2028, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1692 }, { "epoch": 1.0621079046424091, "grad_norm": 1.9821325540542603, "learning_rate": 2.666807298773217e-06, "loss": 1.2478, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1693 }, { "epoch": 1.0627352572145545, "grad_norm": 1.344751000404358, "learning_rate": 2.664076718764756e-06, "loss": 1.115, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1694 }, { "epoch": 1.0633626097867002, "grad_norm": 2.035903215408325, "learning_rate": 2.661345942156586e-06, "loss": 1.2145, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1695 }, { "epoch": 1.0639899623588456, "grad_norm": 1.738403558731079, "learning_rate": 2.658614972220773e-06, "loss": 1.1725, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1696 }, { "epoch": 1.0646173149309912, "grad_norm": 1.6733629703521729, "learning_rate": 2.655883812229616e-06, "loss": 1.262, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1697 }, { "epoch": 1.0652446675031368, "grad_norm": 1.5742219686508179, "learning_rate": 2.653152465455639e-06, "loss": 1.06, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1698 }, { "epoch": 1.0658720200752823, "grad_norm": 1.934395670890808, "learning_rate": 2.6504209351715914e-06, "loss": 1.2199, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1699 }, { "epoch": 1.066499372647428, "grad_norm": 1.454143762588501, "learning_rate": 2.6476892246504438e-06, "loss": 1.0309, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1700 }, { "epoch": 1.0671267252195733, "grad_norm": 1.5390369892120361, "learning_rate": 2.6449573371653804e-06, "loss": 1.3156, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1701 }, { "epoch": 1.067754077791719, "grad_norm": 1.9746370315551758, "learning_rate": 2.642225275989798e-06, "loss": 1.2621, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1702 }, { "epoch": 1.0683814303638646, "grad_norm": 1.5100734233856201, "learning_rate": 2.6394930443973034e-06, "loss": 1.1465, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1703 }, { "epoch": 1.06900878293601, "grad_norm": 1.6106586456298828, "learning_rate": 2.6367606456617057e-06, "loss": 1.4355, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1704 }, { "epoch": 1.0696361355081556, "grad_norm": 1.8604997396469116, "learning_rate": 2.6340280830570142e-06, "loss": 1.2849, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1705 }, { "epoch": 1.070263488080301, "grad_norm": 1.9282605648040771, "learning_rate": 2.6312953598574353e-06, "loss": 1.238, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1706 }, { "epoch": 1.0708908406524467, "grad_norm": 2.088406801223755, "learning_rate": 2.6285624793373682e-06, "loss": 1.1732, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1707 }, { "epoch": 1.0715181932245923, "grad_norm": 1.6809911727905273, "learning_rate": 2.6258294447714e-06, "loss": 1.202, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1708 }, { "epoch": 1.0721455457967377, "grad_norm": 1.7849010229110718, "learning_rate": 2.6230962594343018e-06, "loss": 1.1261, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1709 }, { "epoch": 1.0727728983688833, "grad_norm": 1.7734084129333496, "learning_rate": 2.620362926601026e-06, "loss": 1.2661, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1710 }, { "epoch": 1.0734002509410288, "grad_norm": 1.7300902605056763, "learning_rate": 2.617629449546703e-06, "loss": 1.2376, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1711 }, { "epoch": 1.0740276035131744, "grad_norm": 1.3210912942886353, "learning_rate": 2.614895831546633e-06, "loss": 1.2699, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1712 }, { "epoch": 1.07465495608532, "grad_norm": 1.6365119218826294, "learning_rate": 2.6121620758762877e-06, "loss": 1.3032, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1713 }, { "epoch": 1.0752823086574654, "grad_norm": 1.5156607627868652, "learning_rate": 2.6094281858113026e-06, "loss": 1.233, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1714 }, { "epoch": 1.075909661229611, "grad_norm": 1.562544822692871, "learning_rate": 2.6066941646274748e-06, "loss": 1.2535, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1715 }, { "epoch": 1.0765370138017567, "grad_norm": 1.8988306522369385, "learning_rate": 2.603960015600759e-06, "loss": 1.2751, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1716 }, { "epoch": 1.0771643663739021, "grad_norm": 1.7881109714508057, "learning_rate": 2.60122574200726e-06, "loss": 1.166, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1717 }, { "epoch": 1.0777917189460477, "grad_norm": 1.5304796695709229, "learning_rate": 2.5984913471232363e-06, "loss": 1.1873, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1718 }, { "epoch": 1.0784190715181932, "grad_norm": 1.8655920028686523, "learning_rate": 2.595756834225089e-06, "loss": 1.3397, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1719 }, { "epoch": 1.0790464240903388, "grad_norm": 1.7003823518753052, "learning_rate": 2.5930222065893607e-06, "loss": 1.218, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1720 }, { "epoch": 1.0796737766624842, "grad_norm": 1.684619426727295, "learning_rate": 2.590287467492732e-06, "loss": 1.3297, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1721 }, { "epoch": 1.0803011292346298, "grad_norm": 2.0093002319335938, "learning_rate": 2.5875526202120175e-06, "loss": 1.1254, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1722 }, { "epoch": 1.0809284818067755, "grad_norm": 1.9766197204589844, "learning_rate": 2.584817668024161e-06, "loss": 1.2286, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1723 }, { "epoch": 1.0815558343789209, "grad_norm": 1.893913745880127, "learning_rate": 2.5820826142062323e-06, "loss": 1.2263, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1724 }, { "epoch": 1.0821831869510665, "grad_norm": 1.637317180633545, "learning_rate": 2.5793474620354213e-06, "loss": 1.2383, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1725 }, { "epoch": 1.0828105395232122, "grad_norm": 1.6308879852294922, "learning_rate": 2.576612214789039e-06, "loss": 1.2169, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1726 }, { "epoch": 1.0834378920953576, "grad_norm": 1.8625541925430298, "learning_rate": 2.573876875744506e-06, "loss": 1.392, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1727 }, { "epoch": 1.0840652446675032, "grad_norm": 1.7185273170471191, "learning_rate": 2.5711414481793574e-06, "loss": 1.1853, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1728 }, { "epoch": 1.0846925972396486, "grad_norm": 1.9088993072509766, "learning_rate": 2.568405935371231e-06, "loss": 1.1485, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1729 }, { "epoch": 1.0853199498117942, "grad_norm": 1.4835342168807983, "learning_rate": 2.5656703405978683e-06, "loss": 1.2852, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1730 }, { "epoch": 1.0859473023839397, "grad_norm": 1.8482460975646973, "learning_rate": 2.5629346671371098e-06, "loss": 1.2489, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1731 }, { "epoch": 1.0865746549560853, "grad_norm": 1.4996140003204346, "learning_rate": 2.5601989182668875e-06, "loss": 1.1504, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1732 }, { "epoch": 1.087202007528231, "grad_norm": 1.9409292936325073, "learning_rate": 2.5574630972652263e-06, "loss": 1.1184, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1733 }, { "epoch": 1.0878293601003763, "grad_norm": 1.8671555519104004, "learning_rate": 2.5547272074102375e-06, "loss": 1.2446, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1734 }, { "epoch": 1.088456712672522, "grad_norm": 1.867174506187439, "learning_rate": 2.551991251980112e-06, "loss": 1.1564, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1735 }, { "epoch": 1.0890840652446676, "grad_norm": 1.884575366973877, "learning_rate": 2.5492552342531234e-06, "loss": 1.2545, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1736 }, { "epoch": 1.089711417816813, "grad_norm": 1.6578123569488525, "learning_rate": 2.546519157507617e-06, "loss": 1.1447, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1737 }, { "epoch": 1.0903387703889587, "grad_norm": 1.6877325773239136, "learning_rate": 2.5437830250220104e-06, "loss": 1.2048, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1738 }, { "epoch": 1.090966122961104, "grad_norm": 1.7903735637664795, "learning_rate": 2.5410468400747858e-06, "loss": 1.4386, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1739 }, { "epoch": 1.0915934755332497, "grad_norm": 1.7975722551345825, "learning_rate": 2.538310605944491e-06, "loss": 1.2991, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1740 }, { "epoch": 1.0922208281053953, "grad_norm": 1.7802077531814575, "learning_rate": 2.5355743259097314e-06, "loss": 1.1507, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1741 }, { "epoch": 1.0928481806775407, "grad_norm": 1.5844789743423462, "learning_rate": 2.532838003249168e-06, "loss": 1.1518, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1742 }, { "epoch": 1.0934755332496864, "grad_norm": 1.7955514192581177, "learning_rate": 2.5301016412415107e-06, "loss": 1.1548, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1743 }, { "epoch": 1.0941028858218318, "grad_norm": 2.168617010116577, "learning_rate": 2.5273652431655204e-06, "loss": 1.2472, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1744 }, { "epoch": 1.0947302383939774, "grad_norm": 1.5748448371887207, "learning_rate": 2.524628812299997e-06, "loss": 1.3269, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1745 }, { "epoch": 1.095357590966123, "grad_norm": 1.7519515752792358, "learning_rate": 2.5218923519237824e-06, "loss": 1.1221, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1746 }, { "epoch": 1.0959849435382685, "grad_norm": 1.8812686204910278, "learning_rate": 2.5191558653157542e-06, "loss": 1.1561, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1747 }, { "epoch": 1.096612296110414, "grad_norm": 1.7000455856323242, "learning_rate": 2.5164193557548196e-06, "loss": 1.2085, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1748 }, { "epoch": 1.0972396486825595, "grad_norm": 1.8226401805877686, "learning_rate": 2.5136828265199143e-06, "loss": 1.2332, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1749 }, { "epoch": 1.0978670012547052, "grad_norm": 1.5405991077423096, "learning_rate": 2.5109462808899976e-06, "loss": 1.3213, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1750 }, { "epoch": 1.0984943538268508, "grad_norm": 1.548893928527832, "learning_rate": 2.5082097221440484e-06, "loss": 1.2114, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1751 }, { "epoch": 1.0991217063989962, "grad_norm": 1.9334415197372437, "learning_rate": 2.5054731535610593e-06, "loss": 1.3025, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1752 }, { "epoch": 1.0997490589711418, "grad_norm": 1.8565415143966675, "learning_rate": 2.502736578420039e-06, "loss": 1.2197, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1753 }, { "epoch": 1.1003764115432872, "grad_norm": 1.8411093950271606, "learning_rate": 2.5e-06, "loss": 1.3657, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1754 }, { "epoch": 1.1010037641154329, "grad_norm": 1.4905067682266235, "learning_rate": 2.497263421579962e-06, "loss": 1.1542, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1755 }, { "epoch": 1.1016311166875785, "grad_norm": 1.9230446815490723, "learning_rate": 2.494526846438941e-06, "loss": 1.3883, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1756 }, { "epoch": 1.102258469259724, "grad_norm": 1.8002456426620483, "learning_rate": 2.4917902778559533e-06, "loss": 1.2106, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1757 }, { "epoch": 1.1028858218318696, "grad_norm": 1.7526447772979736, "learning_rate": 2.4890537191100033e-06, "loss": 1.2373, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1758 }, { "epoch": 1.103513174404015, "grad_norm": 2.3764398097991943, "learning_rate": 2.4863171734800866e-06, "loss": 1.0688, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1759 }, { "epoch": 1.1041405269761606, "grad_norm": 1.8072677850723267, "learning_rate": 2.4835806442451804e-06, "loss": 1.436, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1760 }, { "epoch": 1.1047678795483062, "grad_norm": 2.030597686767578, "learning_rate": 2.480844134684246e-06, "loss": 1.2645, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1761 }, { "epoch": 1.1053952321204517, "grad_norm": 1.8442529439926147, "learning_rate": 2.4781076480762184e-06, "loss": 1.1999, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1762 }, { "epoch": 1.1060225846925973, "grad_norm": 1.9745938777923584, "learning_rate": 2.4753711877000035e-06, "loss": 1.2658, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1763 }, { "epoch": 1.1066499372647427, "grad_norm": 1.66330087184906, "learning_rate": 2.472634756834481e-06, "loss": 1.2338, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1764 }, { "epoch": 1.1072772898368883, "grad_norm": 1.705795168876648, "learning_rate": 2.4698983587584892e-06, "loss": 1.2796, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1765 }, { "epoch": 1.107904642409034, "grad_norm": 1.7829371690750122, "learning_rate": 2.467161996750833e-06, "loss": 1.3402, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1766 }, { "epoch": 1.1085319949811794, "grad_norm": 1.7036097049713135, "learning_rate": 2.464425674090269e-06, "loss": 1.2326, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1767 }, { "epoch": 1.109159347553325, "grad_norm": 1.9317264556884766, "learning_rate": 2.4616893940555094e-06, "loss": 1.4099, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1768 }, { "epoch": 1.1097867001254704, "grad_norm": 1.7943556308746338, "learning_rate": 2.4589531599252155e-06, "loss": 1.2372, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1769 }, { "epoch": 1.110414052697616, "grad_norm": 1.7894842624664307, "learning_rate": 2.4562169749779904e-06, "loss": 1.3813, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1770 }, { "epoch": 1.1110414052697617, "grad_norm": 1.698136568069458, "learning_rate": 2.4534808424923837e-06, "loss": 1.3245, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1771 }, { "epoch": 1.111668757841907, "grad_norm": 1.8109681606292725, "learning_rate": 2.4507447657468766e-06, "loss": 1.2317, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1772 }, { "epoch": 1.1122961104140527, "grad_norm": 1.8442978858947754, "learning_rate": 2.4480087480198884e-06, "loss": 1.2614, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1773 }, { "epoch": 1.1129234629861982, "grad_norm": 1.7043442726135254, "learning_rate": 2.4452727925897633e-06, "loss": 1.132, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1774 }, { "epoch": 1.1135508155583438, "grad_norm": 2.0383293628692627, "learning_rate": 2.4425369027347746e-06, "loss": 1.4807, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1775 }, { "epoch": 1.1141781681304894, "grad_norm": 1.718584418296814, "learning_rate": 2.4398010817331133e-06, "loss": 1.1544, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1776 }, { "epoch": 1.1148055207026348, "grad_norm": 1.7924760580062866, "learning_rate": 2.4370653328628915e-06, "loss": 1.4051, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1777 }, { "epoch": 1.1154328732747805, "grad_norm": 1.5274145603179932, "learning_rate": 2.4343296594021325e-06, "loss": 1.242, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1778 }, { "epoch": 1.1160602258469259, "grad_norm": 2.0505731105804443, "learning_rate": 2.4315940646287693e-06, "loss": 1.326, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1779 }, { "epoch": 1.1166875784190715, "grad_norm": 1.7353205680847168, "learning_rate": 2.4288585518206435e-06, "loss": 1.3196, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1780 }, { "epoch": 1.1173149309912171, "grad_norm": 1.5183053016662598, "learning_rate": 2.426123124255495e-06, "loss": 1.3235, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1781 }, { "epoch": 1.1179422835633626, "grad_norm": 1.6555873155593872, "learning_rate": 2.423387785210962e-06, "loss": 1.2756, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1782 }, { "epoch": 1.1185696361355082, "grad_norm": 1.7769240140914917, "learning_rate": 2.420652537964579e-06, "loss": 1.301, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1783 }, { "epoch": 1.1191969887076536, "grad_norm": 1.6840736865997314, "learning_rate": 2.4179173857937686e-06, "loss": 1.2422, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1784 }, { "epoch": 1.1198243412797992, "grad_norm": 1.6172568798065186, "learning_rate": 2.4151823319758397e-06, "loss": 1.2463, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1785 }, { "epoch": 1.1204516938519449, "grad_norm": 1.7393953800201416, "learning_rate": 2.4124473797879825e-06, "loss": 1.2352, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1786 }, { "epoch": 1.1210790464240903, "grad_norm": 1.824742317199707, "learning_rate": 2.4097125325072687e-06, "loss": 1.212, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1787 }, { "epoch": 1.121706398996236, "grad_norm": 1.6635518074035645, "learning_rate": 2.406977793410641e-06, "loss": 1.2657, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1788 }, { "epoch": 1.1223337515683813, "grad_norm": 1.4325731992721558, "learning_rate": 2.404243165774912e-06, "loss": 1.1163, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1789 }, { "epoch": 1.122961104140527, "grad_norm": 1.8488574028015137, "learning_rate": 2.4015086528767645e-06, "loss": 1.2844, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1790 }, { "epoch": 1.1235884567126726, "grad_norm": 1.8103573322296143, "learning_rate": 2.3987742579927407e-06, "loss": 1.3157, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1791 }, { "epoch": 1.124215809284818, "grad_norm": 1.4980573654174805, "learning_rate": 2.396039984399242e-06, "loss": 1.162, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1792 }, { "epoch": 1.1248431618569636, "grad_norm": 1.831099271774292, "learning_rate": 2.393305835372525e-06, "loss": 1.2391, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1793 }, { "epoch": 1.125470514429109, "grad_norm": 1.9267017841339111, "learning_rate": 2.390571814188698e-06, "loss": 1.3137, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1794 }, { "epoch": 1.1260978670012547, "grad_norm": 1.8979977369308472, "learning_rate": 2.3878379241237136e-06, "loss": 1.2354, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1795 }, { "epoch": 1.1267252195734003, "grad_norm": 2.1855428218841553, "learning_rate": 2.3851041684533677e-06, "loss": 1.1774, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1796 }, { "epoch": 1.1273525721455457, "grad_norm": 2.137327194213867, "learning_rate": 2.382370550453298e-06, "loss": 1.2398, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1797 }, { "epoch": 1.1279799247176914, "grad_norm": 1.9005476236343384, "learning_rate": 2.379637073398974e-06, "loss": 1.2664, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1798 }, { "epoch": 1.128607277289837, "grad_norm": 1.5104117393493652, "learning_rate": 2.376903740565699e-06, "loss": 1.0942, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1799 }, { "epoch": 1.1292346298619824, "grad_norm": 1.8482791185379028, "learning_rate": 2.3741705552286007e-06, "loss": 1.2907, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1800 }, { "epoch": 1.129861982434128, "grad_norm": 1.5305474996566772, "learning_rate": 2.3714375206626326e-06, "loss": 1.184, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1801 }, { "epoch": 1.1304893350062735, "grad_norm": 1.593786358833313, "learning_rate": 2.3687046401425656e-06, "loss": 1.3543, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1802 }, { "epoch": 1.131116687578419, "grad_norm": 1.7399150133132935, "learning_rate": 2.3659719169429866e-06, "loss": 1.2495, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1803 }, { "epoch": 1.1317440401505645, "grad_norm": 1.9040228128433228, "learning_rate": 2.363239354338295e-06, "loss": 1.2781, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1804 }, { "epoch": 1.1323713927227101, "grad_norm": 3.0083515644073486, "learning_rate": 2.3605069556026966e-06, "loss": 1.1329, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1805 }, { "epoch": 1.1329987452948558, "grad_norm": 1.74836003780365, "learning_rate": 2.3577747240102024e-06, "loss": 1.2894, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1806 }, { "epoch": 1.1336260978670012, "grad_norm": 1.5725035667419434, "learning_rate": 2.3550426628346204e-06, "loss": 1.2281, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1807 }, { "epoch": 1.1342534504391468, "grad_norm": 1.735289216041565, "learning_rate": 2.352310775349557e-06, "loss": 1.2272, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1808 }, { "epoch": 1.1348808030112925, "grad_norm": 1.5815335512161255, "learning_rate": 2.349579064828409e-06, "loss": 1.225, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1809 }, { "epoch": 1.1355081555834379, "grad_norm": 1.7958688735961914, "learning_rate": 2.346847534544362e-06, "loss": 1.2673, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1810 }, { "epoch": 1.1361355081555835, "grad_norm": 1.7950382232666016, "learning_rate": 2.3441161877703855e-06, "loss": 1.2082, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1811 }, { "epoch": 1.136762860727729, "grad_norm": 1.9151309728622437, "learning_rate": 2.341385027779227e-06, "loss": 1.2292, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1812 }, { "epoch": 1.1373902132998746, "grad_norm": 1.782690405845642, "learning_rate": 2.3386540578434147e-06, "loss": 1.3128, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1813 }, { "epoch": 1.13801756587202, "grad_norm": 1.810426950454712, "learning_rate": 2.3359232812352444e-06, "loss": 1.1123, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1814 }, { "epoch": 1.1386449184441656, "grad_norm": 1.3380026817321777, "learning_rate": 2.3331927012267842e-06, "loss": 1.1458, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1815 }, { "epoch": 1.1392722710163112, "grad_norm": 1.7149876356124878, "learning_rate": 2.3304623210898634e-06, "loss": 1.2083, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1816 }, { "epoch": 1.1398996235884566, "grad_norm": 2.052325963973999, "learning_rate": 2.3277321440960733e-06, "loss": 1.1781, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1817 }, { "epoch": 1.1405269761606023, "grad_norm": 1.4271595478057861, "learning_rate": 2.3250021735167624e-06, "loss": 1.1893, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1818 }, { "epoch": 1.141154328732748, "grad_norm": 1.9418270587921143, "learning_rate": 2.3222724126230296e-06, "loss": 1.2215, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1819 }, { "epoch": 1.1417816813048933, "grad_norm": 1.9351154565811157, "learning_rate": 2.3195428646857257e-06, "loss": 1.2943, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1820 }, { "epoch": 1.142409033877039, "grad_norm": 1.6721124649047852, "learning_rate": 2.316813532975443e-06, "loss": 1.1828, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1821 }, { "epoch": 1.1430363864491844, "grad_norm": 1.749272346496582, "learning_rate": 2.3140844207625188e-06, "loss": 1.2076, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1822 }, { "epoch": 1.14366373902133, "grad_norm": 1.8967795372009277, "learning_rate": 2.3113555313170234e-06, "loss": 1.3525, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1823 }, { "epoch": 1.1442910915934754, "grad_norm": 1.8196319341659546, "learning_rate": 2.308626867908761e-06, "loss": 1.1991, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1824 }, { "epoch": 1.144918444165621, "grad_norm": 1.7723875045776367, "learning_rate": 2.3058984338072672e-06, "loss": 1.3093, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1825 }, { "epoch": 1.1455457967377667, "grad_norm": 1.8782674074172974, "learning_rate": 2.3031702322818013e-06, "loss": 1.2351, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1826 }, { "epoch": 1.146173149309912, "grad_norm": 2.256856679916382, "learning_rate": 2.3004422666013432e-06, "loss": 1.2479, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1827 }, { "epoch": 1.1468005018820577, "grad_norm": 1.7046806812286377, "learning_rate": 2.2977145400345906e-06, "loss": 1.3064, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1828 }, { "epoch": 1.1474278544542034, "grad_norm": 1.9272924661636353, "learning_rate": 2.294987055849957e-06, "loss": 1.3769, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1829 }, { "epoch": 1.1480552070263488, "grad_norm": 1.8157531023025513, "learning_rate": 2.2922598173155624e-06, "loss": 1.2539, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1830 }, { "epoch": 1.1486825595984944, "grad_norm": 1.9448930025100708, "learning_rate": 2.2895328276992325e-06, "loss": 1.2955, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1831 }, { "epoch": 1.1493099121706398, "grad_norm": 1.8604695796966553, "learning_rate": 2.2868060902684986e-06, "loss": 1.2383, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1832 }, { "epoch": 1.1499372647427855, "grad_norm": 1.7356843948364258, "learning_rate": 2.284079608290584e-06, "loss": 1.2334, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1833 }, { "epoch": 1.1505646173149309, "grad_norm": 1.9711636304855347, "learning_rate": 2.281353385032412e-06, "loss": 1.1848, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1834 }, { "epoch": 1.1511919698870765, "grad_norm": 2.1687605381011963, "learning_rate": 2.2786274237605904e-06, "loss": 1.2311, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1835 }, { "epoch": 1.1518193224592221, "grad_norm": 1.8387099504470825, "learning_rate": 2.2759017277414165e-06, "loss": 1.2324, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1836 }, { "epoch": 1.1524466750313676, "grad_norm": 1.8152990341186523, "learning_rate": 2.2731763002408696e-06, "loss": 1.2585, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1837 }, { "epoch": 1.1530740276035132, "grad_norm": 1.7368495464324951, "learning_rate": 2.270451144524605e-06, "loss": 1.1628, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1838 }, { "epoch": 1.1537013801756588, "grad_norm": 1.7837342023849487, "learning_rate": 2.2677262638579554e-06, "loss": 1.3714, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1839 }, { "epoch": 1.1543287327478042, "grad_norm": 1.7108197212219238, "learning_rate": 2.265001661505921e-06, "loss": 1.2377, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1840 }, { "epoch": 1.1549560853199499, "grad_norm": 1.7582619190216064, "learning_rate": 2.2622773407331713e-06, "loss": 1.1935, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1841 }, { "epoch": 1.1555834378920953, "grad_norm": 1.199617624282837, "learning_rate": 2.2595533048040362e-06, "loss": 1.134, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1842 }, { "epoch": 1.156210790464241, "grad_norm": 1.9170664548873901, "learning_rate": 2.256829556982505e-06, "loss": 1.3591, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1843 }, { "epoch": 1.1568381430363865, "grad_norm": 1.8503596782684326, "learning_rate": 2.254106100532223e-06, "loss": 1.1394, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1844 }, { "epoch": 1.157465495608532, "grad_norm": 1.9844796657562256, "learning_rate": 2.251382938716482e-06, "loss": 1.2423, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1845 }, { "epoch": 1.1580928481806776, "grad_norm": 1.7377749681472778, "learning_rate": 2.2486600747982286e-06, "loss": 1.1889, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1846 }, { "epoch": 1.158720200752823, "grad_norm": 1.7416249513626099, "learning_rate": 2.2459375120400444e-06, "loss": 1.2454, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1847 }, { "epoch": 1.1593475533249686, "grad_norm": 1.5563774108886719, "learning_rate": 2.2432152537041556e-06, "loss": 1.242, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1848 }, { "epoch": 1.1599749058971143, "grad_norm": 2.0742273330688477, "learning_rate": 2.240493303052421e-06, "loss": 1.2014, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1849 }, { "epoch": 1.1606022584692597, "grad_norm": 1.8404746055603027, "learning_rate": 2.237771663346331e-06, "loss": 1.2473, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1850 }, { "epoch": 1.1612296110414053, "grad_norm": 1.564107060432434, "learning_rate": 2.235050337847004e-06, "loss": 1.1768, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1851 }, { "epoch": 1.1618569636135507, "grad_norm": 1.5945340394973755, "learning_rate": 2.2323293298151817e-06, "loss": 1.1811, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1852 }, { "epoch": 1.1624843161856964, "grad_norm": 1.763053059577942, "learning_rate": 2.229608642511226e-06, "loss": 1.1327, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1853 }, { "epoch": 1.163111668757842, "grad_norm": 1.7052210569381714, "learning_rate": 2.2268882791951125e-06, "loss": 1.2935, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1854 }, { "epoch": 1.1637390213299874, "grad_norm": 2.095878839492798, "learning_rate": 2.2241682431264323e-06, "loss": 1.2472, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1855 }, { "epoch": 1.164366373902133, "grad_norm": 2.009671926498413, "learning_rate": 2.2214485375643804e-06, "loss": 1.4277, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1856 }, { "epoch": 1.1649937264742785, "grad_norm": 1.7650864124298096, "learning_rate": 2.218729165767757e-06, "loss": 1.3452, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1857 }, { "epoch": 1.165621079046424, "grad_norm": 1.6924291849136353, "learning_rate": 2.216010130994965e-06, "loss": 1.0748, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1858 }, { "epoch": 1.1662484316185697, "grad_norm": 1.9703468084335327, "learning_rate": 2.2132914365039993e-06, "loss": 1.3066, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1859 }, { "epoch": 1.1668757841907151, "grad_norm": 1.3924521207809448, "learning_rate": 2.2105730855524503e-06, "loss": 1.3009, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1860 }, { "epoch": 1.1675031367628608, "grad_norm": 1.778253197669983, "learning_rate": 2.207855081397494e-06, "loss": 1.1961, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1861 }, { "epoch": 1.1681304893350062, "grad_norm": 1.7997092008590698, "learning_rate": 2.2051374272958946e-06, "loss": 1.2506, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1862 }, { "epoch": 1.1687578419071518, "grad_norm": 1.8622349500656128, "learning_rate": 2.2024201265039933e-06, "loss": 1.2358, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1863 }, { "epoch": 1.1693851944792975, "grad_norm": 1.6936147212982178, "learning_rate": 2.1997031822777093e-06, "loss": 1.3494, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1864 }, { "epoch": 1.1700125470514429, "grad_norm": 1.8487699031829834, "learning_rate": 2.1969865978725353e-06, "loss": 1.2655, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1865 }, { "epoch": 1.1706398996235885, "grad_norm": 1.7117902040481567, "learning_rate": 2.1942703765435317e-06, "loss": 1.1697, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1866 }, { "epoch": 1.171267252195734, "grad_norm": 1.899256944656372, "learning_rate": 2.1915545215453245e-06, "loss": 1.3501, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1867 }, { "epoch": 1.1718946047678795, "grad_norm": 1.7100666761398315, "learning_rate": 2.1888390361320997e-06, "loss": 1.187, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1868 }, { "epoch": 1.1725219573400252, "grad_norm": 1.6679086685180664, "learning_rate": 2.1861239235576033e-06, "loss": 1.1233, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1869 }, { "epoch": 1.1731493099121706, "grad_norm": 1.8351097106933594, "learning_rate": 2.183409187075131e-06, "loss": 1.2701, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1870 }, { "epoch": 1.1737766624843162, "grad_norm": 1.9863923788070679, "learning_rate": 2.180694829937529e-06, "loss": 1.3121, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1871 }, { "epoch": 1.1744040150564616, "grad_norm": 1.8615024089813232, "learning_rate": 2.177980855397191e-06, "loss": 1.1982, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1872 }, { "epoch": 1.1750313676286073, "grad_norm": 1.921243667602539, "learning_rate": 2.1752672667060488e-06, "loss": 1.244, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1873 }, { "epoch": 1.175658720200753, "grad_norm": 1.8630694150924683, "learning_rate": 2.172554067115576e-06, "loss": 1.3146, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1874 }, { "epoch": 1.1762860727728983, "grad_norm": 1.8418269157409668, "learning_rate": 2.169841259876776e-06, "loss": 1.2563, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1875 }, { "epoch": 1.176913425345044, "grad_norm": 1.811339259147644, "learning_rate": 2.1671288482401842e-06, "loss": 1.2523, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1876 }, { "epoch": 1.1775407779171894, "grad_norm": 1.922473430633545, "learning_rate": 2.1644168354558623e-06, "loss": 1.1695, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1877 }, { "epoch": 1.178168130489335, "grad_norm": 1.6754716634750366, "learning_rate": 2.161705224773391e-06, "loss": 1.1444, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1878 }, { "epoch": 1.1787954830614806, "grad_norm": 2.2326772212982178, "learning_rate": 2.158994019441875e-06, "loss": 1.1346, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1879 }, { "epoch": 1.179422835633626, "grad_norm": 1.7165148258209229, "learning_rate": 2.1562832227099266e-06, "loss": 1.1899, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1880 }, { "epoch": 1.1800501882057717, "grad_norm": 1.8452473878860474, "learning_rate": 2.1535728378256742e-06, "loss": 1.1952, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1881 }, { "epoch": 1.1806775407779173, "grad_norm": 1.6404380798339844, "learning_rate": 2.150862868036749e-06, "loss": 1.2802, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1882 }, { "epoch": 1.1813048933500627, "grad_norm": 2.0825002193450928, "learning_rate": 2.1481533165902863e-06, "loss": 1.2548, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1883 }, { "epoch": 1.1819322459222084, "grad_norm": 1.647680401802063, "learning_rate": 2.1454441867329205e-06, "loss": 1.1099, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1884 }, { "epoch": 1.1825595984943538, "grad_norm": 1.8102099895477295, "learning_rate": 2.1427354817107793e-06, "loss": 1.2962, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1885 }, { "epoch": 1.1831869510664994, "grad_norm": 1.9231828451156616, "learning_rate": 2.1400272047694835e-06, "loss": 1.2266, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1886 }, { "epoch": 1.1838143036386448, "grad_norm": 1.8153226375579834, "learning_rate": 2.137319359154138e-06, "loss": 1.2095, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1887 }, { "epoch": 1.1844416562107905, "grad_norm": 1.8013314008712769, "learning_rate": 2.1346119481093346e-06, "loss": 1.1613, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1888 }, { "epoch": 1.185069008782936, "grad_norm": 1.8556623458862305, "learning_rate": 2.1319049748791418e-06, "loss": 1.2125, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1889 }, { "epoch": 1.1856963613550815, "grad_norm": 1.7150856256484985, "learning_rate": 2.129198442707103e-06, "loss": 1.2205, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1890 }, { "epoch": 1.1863237139272271, "grad_norm": 1.7657842636108398, "learning_rate": 2.1264923548362366e-06, "loss": 1.3468, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1891 }, { "epoch": 1.1869510664993728, "grad_norm": 1.9392738342285156, "learning_rate": 2.1237867145090242e-06, "loss": 1.3777, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1892 }, { "epoch": 1.1875784190715182, "grad_norm": 1.9274314641952515, "learning_rate": 2.1210815249674148e-06, "loss": 1.2644, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1893 }, { "epoch": 1.1882057716436638, "grad_norm": 1.9306237697601318, "learning_rate": 2.1183767894528135e-06, "loss": 1.3857, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1894 }, { "epoch": 1.1888331242158092, "grad_norm": 1.8561091423034668, "learning_rate": 2.1156725112060868e-06, "loss": 1.203, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1895 }, { "epoch": 1.1894604767879549, "grad_norm": 2.7982521057128906, "learning_rate": 2.112968693467548e-06, "loss": 1.2022, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1896 }, { "epoch": 1.1900878293601003, "grad_norm": 1.6578912734985352, "learning_rate": 2.1102653394769605e-06, "loss": 1.2315, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1897 }, { "epoch": 1.190715181932246, "grad_norm": 1.6461844444274902, "learning_rate": 2.1075624524735335e-06, "loss": 1.2014, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1898 }, { "epoch": 1.1913425345043915, "grad_norm": 1.4624578952789307, "learning_rate": 2.1048600356959133e-06, "loss": 1.1935, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1899 }, { "epoch": 1.191969887076537, "grad_norm": 1.8360897302627563, "learning_rate": 2.1021580923821876e-06, "loss": 1.2885, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1900 }, { "epoch": 1.1925972396486826, "grad_norm": 1.8737788200378418, "learning_rate": 2.099456625769872e-06, "loss": 1.1497, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1901 }, { "epoch": 1.1932245922208282, "grad_norm": 1.8200892210006714, "learning_rate": 2.0967556390959147e-06, "loss": 1.2012, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1902 }, { "epoch": 1.1938519447929736, "grad_norm": 1.9442538022994995, "learning_rate": 2.0940551355966856e-06, "loss": 1.5149, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1903 }, { "epoch": 1.1944792973651193, "grad_norm": 1.9775135517120361, "learning_rate": 2.0913551185079763e-06, "loss": 1.2177, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1904 }, { "epoch": 1.1951066499372647, "grad_norm": 1.7279037237167358, "learning_rate": 2.0886555910649987e-06, "loss": 1.2819, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1905 }, { "epoch": 1.1957340025094103, "grad_norm": 1.5370543003082275, "learning_rate": 2.0859565565023735e-06, "loss": 1.3726, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1906 }, { "epoch": 1.1963613550815557, "grad_norm": 1.768170952796936, "learning_rate": 2.0832580180541356e-06, "loss": 1.1752, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1907 }, { "epoch": 1.1969887076537014, "grad_norm": 1.8252557516098022, "learning_rate": 2.08055997895372e-06, "loss": 1.4465, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1908 }, { "epoch": 1.197616060225847, "grad_norm": 1.8621381521224976, "learning_rate": 2.0778624424339684e-06, "loss": 1.2775, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1909 }, { "epoch": 1.1982434127979924, "grad_norm": 2.329472780227661, "learning_rate": 2.0751654117271177e-06, "loss": 1.3053, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1910 }, { "epoch": 1.198870765370138, "grad_norm": 1.8072152137756348, "learning_rate": 2.0724688900647973e-06, "loss": 1.1612, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1911 }, { "epoch": 1.1994981179422837, "grad_norm": 1.9142813682556152, "learning_rate": 2.0697728806780308e-06, "loss": 1.2612, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1912 }, { "epoch": 1.200125470514429, "grad_norm": 1.831708550453186, "learning_rate": 2.0670773867972236e-06, "loss": 1.1592, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1913 }, { "epoch": 1.2007528230865747, "grad_norm": 1.7283164262771606, "learning_rate": 2.0643824116521683e-06, "loss": 1.2807, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1914 }, { "epoch": 1.2013801756587201, "grad_norm": 1.6546218395233154, "learning_rate": 2.0616879584720305e-06, "loss": 1.3963, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1915 }, { "epoch": 1.2020075282308658, "grad_norm": 1.7884093523025513, "learning_rate": 2.0589940304853552e-06, "loss": 1.1979, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1916 }, { "epoch": 1.2026348808030112, "grad_norm": 1.6815849542617798, "learning_rate": 2.0563006309200545e-06, "loss": 1.2687, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1917 }, { "epoch": 1.2032622333751568, "grad_norm": 1.7797585725784302, "learning_rate": 2.053607763003409e-06, "loss": 1.1443, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1918 }, { "epoch": 1.2038895859473024, "grad_norm": 1.181809663772583, "learning_rate": 2.0509154299620622e-06, "loss": 1.2201, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1919 }, { "epoch": 1.2045169385194479, "grad_norm": 1.8268485069274902, "learning_rate": 2.0482236350220152e-06, "loss": 1.2838, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1920 }, { "epoch": 1.2051442910915935, "grad_norm": 1.9953694343566895, "learning_rate": 2.0455323814086284e-06, "loss": 1.1425, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1921 }, { "epoch": 1.2057716436637391, "grad_norm": 1.6402983665466309, "learning_rate": 2.042841672346608e-06, "loss": 1.1408, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1922 }, { "epoch": 1.2063989962358845, "grad_norm": 1.7828822135925293, "learning_rate": 2.040151511060013e-06, "loss": 1.2727, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1923 }, { "epoch": 1.2070263488080302, "grad_norm": 1.570185899734497, "learning_rate": 2.0374619007722423e-06, "loss": 1.2971, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1924 }, { "epoch": 1.2076537013801756, "grad_norm": 1.9676752090454102, "learning_rate": 2.034772844706036e-06, "loss": 1.3004, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1925 }, { "epoch": 1.2082810539523212, "grad_norm": 1.4364757537841797, "learning_rate": 2.0320843460834698e-06, "loss": 1.1709, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1926 }, { "epoch": 1.2089084065244666, "grad_norm": 1.668623685836792, "learning_rate": 2.0293964081259514e-06, "loss": 1.2781, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1927 }, { "epoch": 1.2095357590966123, "grad_norm": 1.8750041723251343, "learning_rate": 2.026709034054218e-06, "loss": 1.2256, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1928 }, { "epoch": 1.210163111668758, "grad_norm": 2.012606620788574, "learning_rate": 2.024022227088329e-06, "loss": 1.228, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1929 }, { "epoch": 1.2107904642409033, "grad_norm": 1.8182164430618286, "learning_rate": 2.0213359904476666e-06, "loss": 1.2232, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1930 }, { "epoch": 1.211417816813049, "grad_norm": 1.8466030359268188, "learning_rate": 2.0186503273509276e-06, "loss": 1.2221, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1931 }, { "epoch": 1.2120451693851946, "grad_norm": 1.7908083200454712, "learning_rate": 2.015965241016122e-06, "loss": 1.1342, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1932 }, { "epoch": 1.21267252195734, "grad_norm": 1.8812059164047241, "learning_rate": 2.01328073466057e-06, "loss": 1.2676, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1933 }, { "epoch": 1.2132998745294856, "grad_norm": 1.8390413522720337, "learning_rate": 2.0105968115008957e-06, "loss": 1.2211, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1934 }, { "epoch": 1.213927227101631, "grad_norm": 1.9293330907821655, "learning_rate": 2.0079134747530244e-06, "loss": 1.2886, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1935 }, { "epoch": 1.2145545796737767, "grad_norm": 1.824219822883606, "learning_rate": 2.0052307276321793e-06, "loss": 1.305, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1936 }, { "epoch": 1.2151819322459223, "grad_norm": 1.8564656972885132, "learning_rate": 2.0025485733528754e-06, "loss": 1.2049, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1937 }, { "epoch": 1.2158092848180677, "grad_norm": 1.8456517457962036, "learning_rate": 1.9998670151289214e-06, "loss": 1.3585, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1938 }, { "epoch": 1.2164366373902133, "grad_norm": 1.81951904296875, "learning_rate": 1.9971860561734062e-06, "loss": 1.1953, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1939 }, { "epoch": 1.2170639899623588, "grad_norm": 1.8625571727752686, "learning_rate": 1.994505699698706e-06, "loss": 1.2339, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1940 }, { "epoch": 1.2176913425345044, "grad_norm": 1.8038089275360107, "learning_rate": 1.9918259489164717e-06, "loss": 1.2464, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1941 }, { "epoch": 1.21831869510665, "grad_norm": 1.580041766166687, "learning_rate": 1.9891468070376297e-06, "loss": 1.1855, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1942 }, { "epoch": 1.2189460476787954, "grad_norm": 1.5931955575942993, "learning_rate": 1.9864682772723757e-06, "loss": 1.2787, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1943 }, { "epoch": 1.219573400250941, "grad_norm": 1.7477593421936035, "learning_rate": 1.983790362830174e-06, "loss": 1.3233, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1944 }, { "epoch": 1.2202007528230865, "grad_norm": 1.6215864419937134, "learning_rate": 1.9811130669197496e-06, "loss": 1.2165, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1945 }, { "epoch": 1.2208281053952321, "grad_norm": 1.593785047531128, "learning_rate": 1.978436392749087e-06, "loss": 1.1529, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1946 }, { "epoch": 1.2214554579673778, "grad_norm": 1.6797493696212769, "learning_rate": 1.975760343525427e-06, "loss": 1.227, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1947 }, { "epoch": 1.2220828105395232, "grad_norm": 2.1317989826202393, "learning_rate": 1.9730849224552586e-06, "loss": 1.1288, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1948 }, { "epoch": 1.2227101631116688, "grad_norm": 1.771586537361145, "learning_rate": 1.970410132744322e-06, "loss": 1.2009, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1949 }, { "epoch": 1.2233375156838142, "grad_norm": 1.916454553604126, "learning_rate": 1.967735977597598e-06, "loss": 1.292, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1950 }, { "epoch": 1.2239648682559598, "grad_norm": 1.910947561264038, "learning_rate": 1.965062460219308e-06, "loss": 1.2213, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1951 }, { "epoch": 1.2245922208281055, "grad_norm": 1.6686664819717407, "learning_rate": 1.9623895838129094e-06, "loss": 1.2531, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1952 }, { "epoch": 1.225219573400251, "grad_norm": 2.0595884323120117, "learning_rate": 1.9597173515810896e-06, "loss": 1.0912, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1953 }, { "epoch": 1.2258469259723965, "grad_norm": 1.720428705215454, "learning_rate": 1.9570457667257686e-06, "loss": 1.137, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1954 }, { "epoch": 1.226474278544542, "grad_norm": 1.614362120628357, "learning_rate": 1.9543748324480856e-06, "loss": 1.207, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1955 }, { "epoch": 1.2271016311166876, "grad_norm": 1.9100805521011353, "learning_rate": 1.9517045519484046e-06, "loss": 1.2895, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1956 }, { "epoch": 1.2277289836888332, "grad_norm": 2.1220719814300537, "learning_rate": 1.9490349284263036e-06, "loss": 1.2398, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1957 }, { "epoch": 1.2283563362609786, "grad_norm": 1.9832006692886353, "learning_rate": 1.946365965080573e-06, "loss": 1.2474, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1958 }, { "epoch": 1.2289836888331243, "grad_norm": 1.8225581645965576, "learning_rate": 1.9436976651092143e-06, "loss": 1.3338, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1959 }, { "epoch": 1.2296110414052697, "grad_norm": 1.7378026247024536, "learning_rate": 1.9410300317094328e-06, "loss": 1.2641, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1960 }, { "epoch": 1.2302383939774153, "grad_norm": 1.4091269969940186, "learning_rate": 1.938363068077636e-06, "loss": 1.1238, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1961 }, { "epoch": 1.230865746549561, "grad_norm": 1.8257181644439697, "learning_rate": 1.9356967774094265e-06, "loss": 1.2355, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1962 }, { "epoch": 1.2314930991217063, "grad_norm": 1.5688260793685913, "learning_rate": 1.9330311628996046e-06, "loss": 1.2014, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1963 }, { "epoch": 1.232120451693852, "grad_norm": 2.238872528076172, "learning_rate": 1.930366227742157e-06, "loss": 1.2893, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1964 }, { "epoch": 1.2327478042659976, "grad_norm": 1.6153379678726196, "learning_rate": 1.9277019751302557e-06, "loss": 1.202, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1965 }, { "epoch": 1.233375156838143, "grad_norm": 1.7979259490966797, "learning_rate": 1.9250384082562603e-06, "loss": 1.2252, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1966 }, { "epoch": 1.2340025094102887, "grad_norm": 1.8436015844345093, "learning_rate": 1.922375530311702e-06, "loss": 1.1568, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1967 }, { "epoch": 1.234629861982434, "grad_norm": 1.7746145725250244, "learning_rate": 1.919713344487291e-06, "loss": 1.1825, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1968 }, { "epoch": 1.2352572145545797, "grad_norm": 1.67526113986969, "learning_rate": 1.9170518539729063e-06, "loss": 1.1145, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1969 }, { "epoch": 1.2358845671267251, "grad_norm": 1.6781444549560547, "learning_rate": 1.9143910619575952e-06, "loss": 1.1309, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1970 }, { "epoch": 1.2365119196988708, "grad_norm": 1.8381301164627075, "learning_rate": 1.9117309716295658e-06, "loss": 1.297, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1971 }, { "epoch": 1.2371392722710164, "grad_norm": 1.8952548503875732, "learning_rate": 1.9090715861761868e-06, "loss": 1.3334, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1972 }, { "epoch": 1.2377666248431618, "grad_norm": 1.9088690280914307, "learning_rate": 1.9064129087839833e-06, "loss": 1.2053, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1973 }, { "epoch": 1.2383939774153074, "grad_norm": 1.90237557888031, "learning_rate": 1.9037549426386304e-06, "loss": 1.2269, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1974 }, { "epoch": 1.239021329987453, "grad_norm": 1.8582751750946045, "learning_rate": 1.9010976909249506e-06, "loss": 1.3578, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1975 }, { "epoch": 1.2396486825595985, "grad_norm": 1.7843849658966064, "learning_rate": 1.8984411568269126e-06, "loss": 1.2032, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1976 }, { "epoch": 1.2402760351317441, "grad_norm": 1.9194496870040894, "learning_rate": 1.8957853435276238e-06, "loss": 1.211, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1977 }, { "epoch": 1.2409033877038895, "grad_norm": 1.8976136445999146, "learning_rate": 1.8931302542093274e-06, "loss": 1.1284, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1978 }, { "epoch": 1.2415307402760352, "grad_norm": 1.9729543924331665, "learning_rate": 1.8904758920533988e-06, "loss": 1.2495, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1979 }, { "epoch": 1.2421580928481806, "grad_norm": 1.7914001941680908, "learning_rate": 1.8878222602403452e-06, "loss": 1.3332, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1980 }, { "epoch": 1.2427854454203262, "grad_norm": 1.4101675748825073, "learning_rate": 1.885169361949794e-06, "loss": 1.2431, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1981 }, { "epoch": 1.2434127979924718, "grad_norm": 1.812052845954895, "learning_rate": 1.8825172003604988e-06, "loss": 1.2351, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1982 }, { "epoch": 1.2440401505646173, "grad_norm": 1.7212128639221191, "learning_rate": 1.879865778650326e-06, "loss": 1.4615, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1983 }, { "epoch": 1.2446675031367629, "grad_norm": 1.90878164768219, "learning_rate": 1.8772150999962588e-06, "loss": 1.3332, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1984 }, { "epoch": 1.2452948557089085, "grad_norm": 1.833099365234375, "learning_rate": 1.8745651675743876e-06, "loss": 1.268, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1985 }, { "epoch": 1.245922208281054, "grad_norm": 1.850371241569519, "learning_rate": 1.8719159845599087e-06, "loss": 1.1703, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1986 }, { "epoch": 1.2465495608531996, "grad_norm": 1.7974519729614258, "learning_rate": 1.8692675541271232e-06, "loss": 1.3099, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1987 }, { "epoch": 1.247176913425345, "grad_norm": 1.7612372636795044, "learning_rate": 1.866619879449427e-06, "loss": 1.2829, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1988 }, { "epoch": 1.2478042659974906, "grad_norm": 1.8083420991897583, "learning_rate": 1.863972963699314e-06, "loss": 1.1797, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1989 }, { "epoch": 1.248431618569636, "grad_norm": 1.6637266874313354, "learning_rate": 1.8613268100483655e-06, "loss": 1.2991, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1990 }, { "epoch": 1.2490589711417817, "grad_norm": 3.0416088104248047, "learning_rate": 1.8586814216672506e-06, "loss": 1.281, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1991 }, { "epoch": 1.2496863237139273, "grad_norm": 1.5195271968841553, "learning_rate": 1.8560368017257229e-06, "loss": 1.2255, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1992 }, { "epoch": 1.2503136762860727, "grad_norm": 1.4624004364013672, "learning_rate": 1.8533929533926132e-06, "loss": 1.1726, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1993 }, { "epoch": 1.2509410288582183, "grad_norm": 1.525188684463501, "learning_rate": 1.8507498798358298e-06, "loss": 1.2229, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1994 }, { "epoch": 1.251568381430364, "grad_norm": 1.8328171968460083, "learning_rate": 1.8481075842223495e-06, "loss": 1.1667, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1995 }, { "epoch": 1.2521957340025094, "grad_norm": 1.9061431884765625, "learning_rate": 1.845466069718221e-06, "loss": 1.1872, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1996 }, { "epoch": 1.252823086574655, "grad_norm": 1.416793704032898, "learning_rate": 1.8428253394885547e-06, "loss": 1.0642, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1997 }, { "epoch": 1.2534504391468004, "grad_norm": 1.6995455026626587, "learning_rate": 1.8401853966975197e-06, "loss": 1.3359, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1998 }, { "epoch": 1.254077791718946, "grad_norm": 1.8740665912628174, "learning_rate": 1.8375462445083464e-06, "loss": 1.1768, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 1999 }, { "epoch": 1.2547051442910915, "grad_norm": 1.7629276514053345, "learning_rate": 1.8349078860833125e-06, "loss": 1.3704, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2000 }, { "epoch": 1.2553324968632371, "grad_norm": 1.8292843103408813, "learning_rate": 1.8322703245837486e-06, "loss": 1.1974, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2001 }, { "epoch": 1.2559598494353827, "grad_norm": 1.968329668045044, "learning_rate": 1.8296335631700276e-06, "loss": 1.2561, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2002 }, { "epoch": 1.2565872020075282, "grad_norm": 1.3263936042785645, "learning_rate": 1.8269976050015665e-06, "loss": 1.1638, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2003 }, { "epoch": 1.2572145545796738, "grad_norm": 1.8674527406692505, "learning_rate": 1.8243624532368176e-06, "loss": 1.353, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2004 }, { "epoch": 1.2578419071518194, "grad_norm": 1.6902563571929932, "learning_rate": 1.8217281110332667e-06, "loss": 1.1342, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2005 }, { "epoch": 1.2584692597239648, "grad_norm": 1.9965534210205078, "learning_rate": 1.8190945815474323e-06, "loss": 1.3001, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2006 }, { "epoch": 1.2590966122961105, "grad_norm": 1.8367878198623657, "learning_rate": 1.8164618679348556e-06, "loss": 1.1216, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2007 }, { "epoch": 1.2597239648682559, "grad_norm": 2.2213966846466064, "learning_rate": 1.8138299733501036e-06, "loss": 1.3812, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2008 }, { "epoch": 1.2603513174404015, "grad_norm": 1.5876895189285278, "learning_rate": 1.811198900946759e-06, "loss": 1.2787, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2009 }, { "epoch": 1.260978670012547, "grad_norm": 1.8638761043548584, "learning_rate": 1.8085686538774216e-06, "loss": 1.3114, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2010 }, { "epoch": 1.2616060225846926, "grad_norm": 1.8115777969360352, "learning_rate": 1.8059392352937011e-06, "loss": 1.1951, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2011 }, { "epoch": 1.2622333751568382, "grad_norm": 1.8829171657562256, "learning_rate": 1.8033106483462131e-06, "loss": 1.4238, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2012 }, { "epoch": 1.2628607277289836, "grad_norm": 2.542158365249634, "learning_rate": 1.8006828961845807e-06, "loss": 1.2422, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2013 }, { "epoch": 1.2634880803011292, "grad_norm": 1.9012902975082397, "learning_rate": 1.7980559819574222e-06, "loss": 1.3113, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2014 }, { "epoch": 1.2641154328732749, "grad_norm": 1.7465624809265137, "learning_rate": 1.7954299088123561e-06, "loss": 1.1354, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2015 }, { "epoch": 1.2647427854454203, "grad_norm": 1.758912444114685, "learning_rate": 1.792804679895989e-06, "loss": 1.236, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2016 }, { "epoch": 1.265370138017566, "grad_norm": 1.8036067485809326, "learning_rate": 1.7901802983539201e-06, "loss": 1.1432, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2017 }, { "epoch": 1.2659974905897113, "grad_norm": 1.4807945489883423, "learning_rate": 1.787556767330729e-06, "loss": 1.3149, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2018 }, { "epoch": 1.266624843161857, "grad_norm": 1.527632713317871, "learning_rate": 1.784934089969979e-06, "loss": 1.1745, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2019 }, { "epoch": 1.2672521957340024, "grad_norm": 1.6842902898788452, "learning_rate": 1.782312269414211e-06, "loss": 1.3125, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2020 }, { "epoch": 1.267879548306148, "grad_norm": 1.6198992729187012, "learning_rate": 1.7796913088049355e-06, "loss": 1.1504, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2021 }, { "epoch": 1.2685069008782937, "grad_norm": 1.6432766914367676, "learning_rate": 1.7770712112826383e-06, "loss": 1.3909, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2022 }, { "epoch": 1.269134253450439, "grad_norm": 1.643363356590271, "learning_rate": 1.7744519799867649e-06, "loss": 1.278, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2023 }, { "epoch": 1.2697616060225847, "grad_norm": 1.7085602283477783, "learning_rate": 1.771833618055729e-06, "loss": 1.1347, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2024 }, { "epoch": 1.2703889585947303, "grad_norm": 1.7079628705978394, "learning_rate": 1.7692161286268971e-06, "loss": 1.3693, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2025 }, { "epoch": 1.2710163111668757, "grad_norm": 1.3713947534561157, "learning_rate": 1.7665995148365934e-06, "loss": 1.1312, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2026 }, { "epoch": 1.2716436637390214, "grad_norm": 1.4894335269927979, "learning_rate": 1.7639837798200923e-06, "loss": 1.1924, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2027 }, { "epoch": 1.272271016311167, "grad_norm": 1.779098629951477, "learning_rate": 1.7613689267116141e-06, "loss": 1.2831, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2028 }, { "epoch": 1.2728983688833124, "grad_norm": 1.7018905878067017, "learning_rate": 1.7587549586443253e-06, "loss": 1.0808, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2029 }, { "epoch": 1.2735257214554578, "grad_norm": 2.0568008422851562, "learning_rate": 1.7561418787503275e-06, "loss": 1.3071, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2030 }, { "epoch": 1.2741530740276035, "grad_norm": 1.819620966911316, "learning_rate": 1.7535296901606635e-06, "loss": 1.3893, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2031 }, { "epoch": 1.274780426599749, "grad_norm": 1.7510826587677002, "learning_rate": 1.7509183960053033e-06, "loss": 1.2589, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2032 }, { "epoch": 1.2754077791718945, "grad_norm": 1.9268629550933838, "learning_rate": 1.7483079994131468e-06, "loss": 1.4034, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2033 }, { "epoch": 1.2760351317440402, "grad_norm": 1.7096502780914307, "learning_rate": 1.7456985035120194e-06, "loss": 1.2468, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2034 }, { "epoch": 1.2766624843161858, "grad_norm": 2.8108818531036377, "learning_rate": 1.7430899114286665e-06, "loss": 1.4387, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2035 }, { "epoch": 1.2772898368883312, "grad_norm": 1.732362985610962, "learning_rate": 1.740482226288751e-06, "loss": 1.0624, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2036 }, { "epoch": 1.2779171894604768, "grad_norm": 1.6320720911026, "learning_rate": 1.737875451216848e-06, "loss": 1.3641, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2037 }, { "epoch": 1.2785445420326225, "grad_norm": 1.8338778018951416, "learning_rate": 1.735269589336442e-06, "loss": 1.2858, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2038 }, { "epoch": 1.2791718946047679, "grad_norm": 1.6709052324295044, "learning_rate": 1.7326646437699262e-06, "loss": 1.3238, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2039 }, { "epoch": 1.2797992471769133, "grad_norm": 1.803065299987793, "learning_rate": 1.7300606176385914e-06, "loss": 1.3778, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2040 }, { "epoch": 1.280426599749059, "grad_norm": 1.883104681968689, "learning_rate": 1.7274575140626318e-06, "loss": 1.2621, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2041 }, { "epoch": 1.2810539523212046, "grad_norm": 1.8484208583831787, "learning_rate": 1.7248553361611313e-06, "loss": 1.2509, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2042 }, { "epoch": 1.28168130489335, "grad_norm": 1.7838290929794312, "learning_rate": 1.722254087052068e-06, "loss": 1.1035, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2043 }, { "epoch": 1.2823086574654956, "grad_norm": 1.768359899520874, "learning_rate": 1.7196537698523052e-06, "loss": 1.2978, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2044 }, { "epoch": 1.2829360100376412, "grad_norm": 1.7369588613510132, "learning_rate": 1.7170543876775898e-06, "loss": 1.4121, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2045 }, { "epoch": 1.2835633626097867, "grad_norm": 1.9081089496612549, "learning_rate": 1.71445594364255e-06, "loss": 1.0285, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2046 }, { "epoch": 1.2841907151819323, "grad_norm": 1.3868578672409058, "learning_rate": 1.7118584408606876e-06, "loss": 1.177, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2047 }, { "epoch": 1.284818067754078, "grad_norm": 1.5141382217407227, "learning_rate": 1.709261882444379e-06, "loss": 1.1907, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2048 }, { "epoch": 1.2854454203262233, "grad_norm": 1.4479154348373413, "learning_rate": 1.7066662715048668e-06, "loss": 1.1626, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2049 }, { "epoch": 1.286072772898369, "grad_norm": 1.4316198825836182, "learning_rate": 1.7040716111522592e-06, "loss": 1.1732, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2050 }, { "epoch": 1.2867001254705144, "grad_norm": 1.5437365770339966, "learning_rate": 1.7014779044955262e-06, "loss": 1.207, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2051 }, { "epoch": 1.28732747804266, "grad_norm": 1.5371419191360474, "learning_rate": 1.6988851546424934e-06, "loss": 1.1898, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2052 }, { "epoch": 1.2879548306148054, "grad_norm": 1.7966153621673584, "learning_rate": 1.6962933646998413e-06, "loss": 1.3189, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2053 }, { "epoch": 1.288582183186951, "grad_norm": 2.556145191192627, "learning_rate": 1.6937025377730992e-06, "loss": 1.1015, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2054 }, { "epoch": 1.2892095357590967, "grad_norm": 1.7997864484786987, "learning_rate": 1.6911126769666442e-06, "loss": 1.2621, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2055 }, { "epoch": 1.289836888331242, "grad_norm": 2.656108856201172, "learning_rate": 1.6885237853836932e-06, "loss": 1.3861, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2056 }, { "epoch": 1.2904642409033877, "grad_norm": 1.6520987749099731, "learning_rate": 1.6859358661263048e-06, "loss": 1.2589, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2057 }, { "epoch": 1.2910915934755334, "grad_norm": 2.0792765617370605, "learning_rate": 1.68334892229537e-06, "loss": 1.1939, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2058 }, { "epoch": 1.2917189460476788, "grad_norm": 1.6274360418319702, "learning_rate": 1.6807629569906113e-06, "loss": 1.2164, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2059 }, { "epoch": 1.2923462986198244, "grad_norm": 2.0519096851348877, "learning_rate": 1.6781779733105813e-06, "loss": 1.2378, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2060 }, { "epoch": 1.2929736511919698, "grad_norm": 1.7964249849319458, "learning_rate": 1.6755939743526516e-06, "loss": 1.1686, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2061 }, { "epoch": 1.2936010037641155, "grad_norm": 1.4359616041183472, "learning_rate": 1.6730109632130199e-06, "loss": 1.2983, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2062 }, { "epoch": 1.2942283563362609, "grad_norm": 2.8610012531280518, "learning_rate": 1.6704289429866942e-06, "loss": 1.2046, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2063 }, { "epoch": 1.2948557089084065, "grad_norm": 1.8337393999099731, "learning_rate": 1.6678479167675005e-06, "loss": 1.1787, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2064 }, { "epoch": 1.2954830614805521, "grad_norm": 1.6933302879333496, "learning_rate": 1.665267887648071e-06, "loss": 1.2077, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2065 }, { "epoch": 1.2961104140526976, "grad_norm": 1.5457978248596191, "learning_rate": 1.6626888587198414e-06, "loss": 1.3236, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2066 }, { "epoch": 1.2967377666248432, "grad_norm": 1.985788106918335, "learning_rate": 1.6601108330730536e-06, "loss": 1.2518, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2067 }, { "epoch": 1.2973651191969888, "grad_norm": 2.003683090209961, "learning_rate": 1.657533813796744e-06, "loss": 1.1951, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2068 }, { "epoch": 1.2979924717691342, "grad_norm": 1.9997378587722778, "learning_rate": 1.6549578039787436e-06, "loss": 1.2089, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2069 }, { "epoch": 1.2986198243412799, "grad_norm": 1.4930788278579712, "learning_rate": 1.6523828067056739e-06, "loss": 1.1872, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2070 }, { "epoch": 1.2992471769134253, "grad_norm": 1.7217084169387817, "learning_rate": 1.6498088250629445e-06, "loss": 1.2626, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2071 }, { "epoch": 1.299874529485571, "grad_norm": 1.5882774591445923, "learning_rate": 1.6472358621347472e-06, "loss": 1.2439, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2072 }, { "epoch": 1.3005018820577163, "grad_norm": 1.8129900693893433, "learning_rate": 1.644663921004051e-06, "loss": 1.2066, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2073 }, { "epoch": 1.301129234629862, "grad_norm": 1.7803122997283936, "learning_rate": 1.6420930047526048e-06, "loss": 1.1882, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2074 }, { "epoch": 1.3017565872020076, "grad_norm": 1.6241403818130493, "learning_rate": 1.639523116460926e-06, "loss": 1.1229, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2075 }, { "epoch": 1.302383939774153, "grad_norm": 1.4273207187652588, "learning_rate": 1.636954259208302e-06, "loss": 1.1985, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2076 }, { "epoch": 1.3030112923462986, "grad_norm": 1.567501187324524, "learning_rate": 1.6343864360727835e-06, "loss": 1.1494, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2077 }, { "epoch": 1.3036386449184443, "grad_norm": 1.8070402145385742, "learning_rate": 1.6318196501311841e-06, "loss": 1.2294, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2078 }, { "epoch": 1.3042659974905897, "grad_norm": 1.795170545578003, "learning_rate": 1.629253904459073e-06, "loss": 1.2719, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2079 }, { "epoch": 1.3048933500627353, "grad_norm": 1.7698767185211182, "learning_rate": 1.6266892021307724e-06, "loss": 1.1697, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2080 }, { "epoch": 1.3055207026348807, "grad_norm": 2.0055277347564697, "learning_rate": 1.6241255462193565e-06, "loss": 1.1996, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2081 }, { "epoch": 1.3061480552070264, "grad_norm": 1.8001643419265747, "learning_rate": 1.6215629397966432e-06, "loss": 1.231, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2082 }, { "epoch": 1.3067754077791718, "grad_norm": 1.4703590869903564, "learning_rate": 1.6190013859331958e-06, "loss": 1.2711, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2083 }, { "epoch": 1.3074027603513174, "grad_norm": 1.8987573385238647, "learning_rate": 1.616440887698313e-06, "loss": 1.2926, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2084 }, { "epoch": 1.308030112923463, "grad_norm": 1.6749411821365356, "learning_rate": 1.6138814481600324e-06, "loss": 1.1826, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2085 }, { "epoch": 1.3086574654956085, "grad_norm": 1.7081942558288574, "learning_rate": 1.6113230703851194e-06, "loss": 1.2047, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2086 }, { "epoch": 1.309284818067754, "grad_norm": 1.7994329929351807, "learning_rate": 1.6087657574390681e-06, "loss": 1.209, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2087 }, { "epoch": 1.3099121706398997, "grad_norm": 1.7203141450881958, "learning_rate": 1.6062095123860996e-06, "loss": 1.2379, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2088 }, { "epoch": 1.3105395232120451, "grad_norm": 1.7341119050979614, "learning_rate": 1.6036543382891512e-06, "loss": 1.2684, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2089 }, { "epoch": 1.3111668757841908, "grad_norm": 1.6238679885864258, "learning_rate": 1.6011002382098806e-06, "loss": 1.1199, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2090 }, { "epoch": 1.3117942283563362, "grad_norm": 1.4414830207824707, "learning_rate": 1.5985472152086565e-06, "loss": 1.1536, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2091 }, { "epoch": 1.3124215809284818, "grad_norm": 1.963968276977539, "learning_rate": 1.5959952723445565e-06, "loss": 1.2495, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2092 }, { "epoch": 1.3130489335006272, "grad_norm": 2.341641902923584, "learning_rate": 1.5934444126753663e-06, "loss": 1.2921, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2093 }, { "epoch": 1.3136762860727729, "grad_norm": 1.4138715267181396, "learning_rate": 1.5908946392575713e-06, "loss": 1.1649, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2094 }, { "epoch": 1.3143036386449185, "grad_norm": 1.7310720682144165, "learning_rate": 1.5883459551463576e-06, "loss": 1.2476, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2095 }, { "epoch": 1.314930991217064, "grad_norm": 1.502193808555603, "learning_rate": 1.5857983633956027e-06, "loss": 1.1271, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2096 }, { "epoch": 1.3155583437892095, "grad_norm": 1.9654653072357178, "learning_rate": 1.5832518670578802e-06, "loss": 1.1828, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2097 }, { "epoch": 1.3161856963613552, "grad_norm": 2.250328779220581, "learning_rate": 1.580706469184447e-06, "loss": 1.1769, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2098 }, { "epoch": 1.3168130489335006, "grad_norm": 1.8096691370010376, "learning_rate": 1.5781621728252439e-06, "loss": 1.1981, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2099 }, { "epoch": 1.3174404015056462, "grad_norm": 1.3865689039230347, "learning_rate": 1.5756189810288952e-06, "loss": 1.1288, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2100 }, { "epoch": 1.3180677540777916, "grad_norm": 1.7595837116241455, "learning_rate": 1.5730768968426985e-06, "loss": 1.3046, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2101 }, { "epoch": 1.3186951066499373, "grad_norm": 2.089447498321533, "learning_rate": 1.5705359233126255e-06, "loss": 1.3244, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2102 }, { "epoch": 1.3193224592220827, "grad_norm": 1.8050563335418701, "learning_rate": 1.5679960634833164e-06, "loss": 1.2567, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2103 }, { "epoch": 1.3199498117942283, "grad_norm": 1.724447250366211, "learning_rate": 1.5654573203980782e-06, "loss": 1.1666, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2104 }, { "epoch": 1.320577164366374, "grad_norm": 1.9797168970108032, "learning_rate": 1.5629196970988791e-06, "loss": 1.1802, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2105 }, { "epoch": 1.3212045169385194, "grad_norm": 1.887502908706665, "learning_rate": 1.5603831966263444e-06, "loss": 1.3445, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2106 }, { "epoch": 1.321831869510665, "grad_norm": 1.614529013633728, "learning_rate": 1.5578478220197568e-06, "loss": 1.3333, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2107 }, { "epoch": 1.3224592220828106, "grad_norm": 1.4517871141433716, "learning_rate": 1.5553135763170466e-06, "loss": 1.3709, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2108 }, { "epoch": 1.323086574654956, "grad_norm": 1.386285662651062, "learning_rate": 1.5527804625547937e-06, "loss": 1.31, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2109 }, { "epoch": 1.3237139272271017, "grad_norm": 1.4335236549377441, "learning_rate": 1.5502484837682216e-06, "loss": 1.1913, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2110 }, { "epoch": 1.324341279799247, "grad_norm": 1.60616934299469, "learning_rate": 1.5477176429911934e-06, "loss": 1.3257, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2111 }, { "epoch": 1.3249686323713927, "grad_norm": 1.5817228555679321, "learning_rate": 1.5451879432562078e-06, "loss": 1.1697, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2112 }, { "epoch": 1.3255959849435381, "grad_norm": 1.670749306678772, "learning_rate": 1.5426593875943967e-06, "loss": 1.1722, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2113 }, { "epoch": 1.3262233375156838, "grad_norm": 2.0000078678131104, "learning_rate": 1.5401319790355232e-06, "loss": 1.2758, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2114 }, { "epoch": 1.3268506900878294, "grad_norm": 1.9140396118164062, "learning_rate": 1.537605720607972e-06, "loss": 1.252, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2115 }, { "epoch": 1.3274780426599748, "grad_norm": 1.599395990371704, "learning_rate": 1.5350806153387541e-06, "loss": 1.2188, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2116 }, { "epoch": 1.3281053952321205, "grad_norm": 1.6747828722000122, "learning_rate": 1.5325566662534948e-06, "loss": 1.3726, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2117 }, { "epoch": 1.328732747804266, "grad_norm": 1.457871675491333, "learning_rate": 1.5300338763764371e-06, "loss": 1.2079, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2118 }, { "epoch": 1.3293601003764115, "grad_norm": 1.839821219444275, "learning_rate": 1.5275122487304337e-06, "loss": 1.0779, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2119 }, { "epoch": 1.3299874529485571, "grad_norm": 1.8094121217727661, "learning_rate": 1.5249917863369426e-06, "loss": 1.3537, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2120 }, { "epoch": 1.3306148055207028, "grad_norm": 1.6778947114944458, "learning_rate": 1.5224724922160304e-06, "loss": 1.272, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2121 }, { "epoch": 1.3312421580928482, "grad_norm": 1.0478399991989136, "learning_rate": 1.519954369386359e-06, "loss": 1.094, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2122 }, { "epoch": 1.3318695106649936, "grad_norm": 1.8125706911087036, "learning_rate": 1.5174374208651913e-06, "loss": 1.3052, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2123 }, { "epoch": 1.3324968632371392, "grad_norm": 1.7263497114181519, "learning_rate": 1.5149216496683788e-06, "loss": 1.1937, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2124 }, { "epoch": 1.3331242158092849, "grad_norm": 1.8608651161193848, "learning_rate": 1.5124070588103648e-06, "loss": 1.2324, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2125 }, { "epoch": 1.3337515683814303, "grad_norm": 1.5468758344650269, "learning_rate": 1.5098936513041787e-06, "loss": 1.167, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2126 }, { "epoch": 1.334378920953576, "grad_norm": 1.8814101219177246, "learning_rate": 1.5073814301614298e-06, "loss": 1.1359, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2127 }, { "epoch": 1.3350062735257215, "grad_norm": 1.8458728790283203, "learning_rate": 1.5048703983923086e-06, "loss": 1.2031, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2128 }, { "epoch": 1.335633626097867, "grad_norm": 1.2960106134414673, "learning_rate": 1.5023605590055768e-06, "loss": 1.134, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2129 }, { "epoch": 1.3362609786700126, "grad_norm": 2.0509815216064453, "learning_rate": 1.4998519150085722e-06, "loss": 1.2158, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2130 }, { "epoch": 1.3368883312421582, "grad_norm": 1.7239158153533936, "learning_rate": 1.4973444694071957e-06, "loss": 1.199, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2131 }, { "epoch": 1.3375156838143036, "grad_norm": 1.9156348705291748, "learning_rate": 1.4948382252059158e-06, "loss": 1.2761, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2132 }, { "epoch": 1.338143036386449, "grad_norm": 1.6581004858016968, "learning_rate": 1.4923331854077592e-06, "loss": 1.3608, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2133 }, { "epoch": 1.3387703889585947, "grad_norm": 1.7229657173156738, "learning_rate": 1.4898293530143095e-06, "loss": 1.1478, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2134 }, { "epoch": 1.3393977415307403, "grad_norm": 1.7327300310134888, "learning_rate": 1.4873267310257056e-06, "loss": 1.1457, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2135 }, { "epoch": 1.3400250941028857, "grad_norm": 2.0124669075012207, "learning_rate": 1.4848253224406326e-06, "loss": 1.1386, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2136 }, { "epoch": 1.3406524466750314, "grad_norm": 1.6715619564056396, "learning_rate": 1.482325130256326e-06, "loss": 1.2899, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2137 }, { "epoch": 1.341279799247177, "grad_norm": 2.0009779930114746, "learning_rate": 1.4798261574685597e-06, "loss": 1.1755, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2138 }, { "epoch": 1.3419071518193224, "grad_norm": 1.9581701755523682, "learning_rate": 1.4773284070716504e-06, "loss": 1.2387, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2139 }, { "epoch": 1.342534504391468, "grad_norm": 1.7659634351730347, "learning_rate": 1.4748318820584468e-06, "loss": 1.228, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2140 }, { "epoch": 1.3431618569636137, "grad_norm": 2.4370534420013428, "learning_rate": 1.4723365854203298e-06, "loss": 1.2194, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2141 }, { "epoch": 1.343789209535759, "grad_norm": 2.0025267601013184, "learning_rate": 1.4698425201472111e-06, "loss": 1.1692, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2142 }, { "epoch": 1.3444165621079047, "grad_norm": 1.8108923435211182, "learning_rate": 1.4673496892275237e-06, "loss": 1.3146, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2143 }, { "epoch": 1.3450439146800501, "grad_norm": 1.5675235986709595, "learning_rate": 1.4648580956482238e-06, "loss": 1.0542, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2144 }, { "epoch": 1.3456712672521958, "grad_norm": 1.8576399087905884, "learning_rate": 1.4623677423947841e-06, "loss": 1.2311, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2145 }, { "epoch": 1.3462986198243412, "grad_norm": 1.8584723472595215, "learning_rate": 1.4598786324511892e-06, "loss": 1.0874, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2146 }, { "epoch": 1.3469259723964868, "grad_norm": 2.055356979370117, "learning_rate": 1.4573907687999383e-06, "loss": 1.2173, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2147 }, { "epoch": 1.3475533249686324, "grad_norm": 1.7574433088302612, "learning_rate": 1.4549041544220347e-06, "loss": 1.3364, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2148 }, { "epoch": 1.3481806775407779, "grad_norm": 1.7991924285888672, "learning_rate": 1.452418792296984e-06, "loss": 1.1384, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2149 }, { "epoch": 1.3488080301129235, "grad_norm": 1.873650074005127, "learning_rate": 1.4499346854027921e-06, "loss": 1.2564, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2150 }, { "epoch": 1.3494353826850691, "grad_norm": 1.9361118078231812, "learning_rate": 1.4474518367159623e-06, "loss": 1.3539, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2151 }, { "epoch": 1.3500627352572145, "grad_norm": 1.726911187171936, "learning_rate": 1.4449702492114886e-06, "loss": 1.1937, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2152 }, { "epoch": 1.3506900878293602, "grad_norm": 1.8655195236206055, "learning_rate": 1.4424899258628533e-06, "loss": 1.2017, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2153 }, { "epoch": 1.3513174404015056, "grad_norm": 1.840174674987793, "learning_rate": 1.4400108696420265e-06, "loss": 1.2439, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2154 }, { "epoch": 1.3519447929736512, "grad_norm": 1.859181523323059, "learning_rate": 1.4375330835194568e-06, "loss": 1.3436, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2155 }, { "epoch": 1.3525721455457966, "grad_norm": 1.7906042337417603, "learning_rate": 1.4350565704640746e-06, "loss": 1.1164, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2156 }, { "epoch": 1.3531994981179423, "grad_norm": 1.8030601739883423, "learning_rate": 1.4325813334432805e-06, "loss": 1.1413, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2157 }, { "epoch": 1.353826850690088, "grad_norm": 1.8038133382797241, "learning_rate": 1.4301073754229511e-06, "loss": 1.3058, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2158 }, { "epoch": 1.3544542032622333, "grad_norm": 6.459727764129639, "learning_rate": 1.4276346993674267e-06, "loss": 1.3058, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2159 }, { "epoch": 1.355081555834379, "grad_norm": 1.8177783489227295, "learning_rate": 1.4251633082395117e-06, "loss": 1.2954, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2160 }, { "epoch": 1.3557089084065246, "grad_norm": 1.9179670810699463, "learning_rate": 1.4226932050004735e-06, "loss": 1.2445, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2161 }, { "epoch": 1.35633626097867, "grad_norm": 1.9530935287475586, "learning_rate": 1.4202243926100345e-06, "loss": 1.2474, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2162 }, { "epoch": 1.3569636135508156, "grad_norm": 1.547910213470459, "learning_rate": 1.4177568740263692e-06, "loss": 1.3921, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2163 }, { "epoch": 1.357590966122961, "grad_norm": 1.682190179824829, "learning_rate": 1.415290652206105e-06, "loss": 1.1747, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2164 }, { "epoch": 1.3582183186951067, "grad_norm": 1.6370090246200562, "learning_rate": 1.412825730104313e-06, "loss": 1.148, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2165 }, { "epoch": 1.358845671267252, "grad_norm": 1.9257128238677979, "learning_rate": 1.410362110674508e-06, "loss": 1.2593, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2166 }, { "epoch": 1.3594730238393977, "grad_norm": 1.7159314155578613, "learning_rate": 1.4078997968686425e-06, "loss": 1.3904, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2167 }, { "epoch": 1.3601003764115434, "grad_norm": 1.8190382719039917, "learning_rate": 1.405438791637107e-06, "loss": 1.3654, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2168 }, { "epoch": 1.3607277289836888, "grad_norm": 1.6482884883880615, "learning_rate": 1.4029790979287217e-06, "loss": 1.1206, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2169 }, { "epoch": 1.3613550815558344, "grad_norm": 1.9584014415740967, "learning_rate": 1.400520718690738e-06, "loss": 1.1457, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2170 }, { "epoch": 1.36198243412798, "grad_norm": 1.7346889972686768, "learning_rate": 1.3980636568688283e-06, "loss": 1.2116, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2171 }, { "epoch": 1.3626097867001254, "grad_norm": 1.715471625328064, "learning_rate": 1.395607915407091e-06, "loss": 1.2171, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2172 }, { "epoch": 1.363237139272271, "grad_norm": 1.783653736114502, "learning_rate": 1.3931534972480392e-06, "loss": 1.2286, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2173 }, { "epoch": 1.3638644918444165, "grad_norm": 1.8066835403442383, "learning_rate": 1.3907004053326006e-06, "loss": 1.1425, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2174 }, { "epoch": 1.3644918444165621, "grad_norm": 1.775204062461853, "learning_rate": 1.3882486426001162e-06, "loss": 1.2009, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2175 }, { "epoch": 1.3651191969887075, "grad_norm": 1.8585458993911743, "learning_rate": 1.3857982119883313e-06, "loss": 1.2242, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2176 }, { "epoch": 1.3657465495608532, "grad_norm": 1.6320204734802246, "learning_rate": 1.3833491164333974e-06, "loss": 1.2934, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2177 }, { "epoch": 1.3663739021329988, "grad_norm": 1.6733769178390503, "learning_rate": 1.380901358869865e-06, "loss": 1.1086, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2178 }, { "epoch": 1.3670012547051442, "grad_norm": 1.6722503900527954, "learning_rate": 1.3784549422306808e-06, "loss": 1.0616, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2179 }, { "epoch": 1.3676286072772899, "grad_norm": 1.600703477859497, "learning_rate": 1.3760098694471876e-06, "loss": 1.1497, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2180 }, { "epoch": 1.3682559598494355, "grad_norm": 2.0433199405670166, "learning_rate": 1.373566143449115e-06, "loss": 1.3881, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2181 }, { "epoch": 1.368883312421581, "grad_norm": 1.7326481342315674, "learning_rate": 1.3711237671645802e-06, "loss": 1.2705, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2182 }, { "epoch": 1.3695106649937265, "grad_norm": 1.9093871116638184, "learning_rate": 1.3686827435200817e-06, "loss": 1.2108, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2183 }, { "epoch": 1.370138017565872, "grad_norm": 1.912520170211792, "learning_rate": 1.3662430754405004e-06, "loss": 1.1256, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2184 }, { "epoch": 1.3707653701380176, "grad_norm": 1.5855059623718262, "learning_rate": 1.363804765849089e-06, "loss": 1.2166, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2185 }, { "epoch": 1.371392722710163, "grad_norm": 1.4968267679214478, "learning_rate": 1.361367817667476e-06, "loss": 1.3215, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2186 }, { "epoch": 1.3720200752823086, "grad_norm": 1.8088436126708984, "learning_rate": 1.3589322338156571e-06, "loss": 1.2745, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2187 }, { "epoch": 1.3726474278544543, "grad_norm": 1.8022375106811523, "learning_rate": 1.3564980172119913e-06, "loss": 1.2414, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2188 }, { "epoch": 1.3732747804265997, "grad_norm": 1.7805216312408447, "learning_rate": 1.3540651707732036e-06, "loss": 1.2126, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2189 }, { "epoch": 1.3739021329987453, "grad_norm": 1.4937478303909302, "learning_rate": 1.3516336974143726e-06, "loss": 1.0723, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2190 }, { "epoch": 1.374529485570891, "grad_norm": 1.806618571281433, "learning_rate": 1.3492036000489361e-06, "loss": 1.2422, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2191 }, { "epoch": 1.3751568381430364, "grad_norm": 1.7576457262039185, "learning_rate": 1.34677488158868e-06, "loss": 1.2792, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2192 }, { "epoch": 1.375784190715182, "grad_norm": 2.2949795722961426, "learning_rate": 1.3443475449437377e-06, "loss": 1.14, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2193 }, { "epoch": 1.3764115432873274, "grad_norm": 1.6671180725097656, "learning_rate": 1.3419215930225898e-06, "loss": 1.2154, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2194 }, { "epoch": 1.377038895859473, "grad_norm": 1.8693598508834839, "learning_rate": 1.3394970287320553e-06, "loss": 1.1688, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2195 }, { "epoch": 1.3776662484316184, "grad_norm": 1.7913018465042114, "learning_rate": 1.3370738549772921e-06, "loss": 1.2461, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2196 }, { "epoch": 1.378293601003764, "grad_norm": 1.840532898902893, "learning_rate": 1.3346520746617903e-06, "loss": 1.2051, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2197 }, { "epoch": 1.3789209535759097, "grad_norm": 1.9473389387130737, "learning_rate": 1.3322316906873712e-06, "loss": 1.1106, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2198 }, { "epoch": 1.3795483061480551, "grad_norm": 1.4682437181472778, "learning_rate": 1.329812705954183e-06, "loss": 1.1503, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2199 }, { "epoch": 1.3801756587202008, "grad_norm": 1.787130355834961, "learning_rate": 1.327395123360696e-06, "loss": 1.2355, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2200 }, { "epoch": 1.3808030112923464, "grad_norm": 1.6980496644973755, "learning_rate": 1.324978945803704e-06, "loss": 1.1478, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2201 }, { "epoch": 1.3814303638644918, "grad_norm": 1.329012393951416, "learning_rate": 1.3225641761783126e-06, "loss": 1.2075, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2202 }, { "epoch": 1.3820577164366374, "grad_norm": 2.6499478816986084, "learning_rate": 1.320150817377945e-06, "loss": 1.2777, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2203 }, { "epoch": 1.382685069008783, "grad_norm": 2.159608840942383, "learning_rate": 1.317738872294329e-06, "loss": 1.1045, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2204 }, { "epoch": 1.3833124215809285, "grad_norm": 1.7414979934692383, "learning_rate": 1.3153283438175036e-06, "loss": 1.3571, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2205 }, { "epoch": 1.383939774153074, "grad_norm": 1.630829095840454, "learning_rate": 1.3129192348358066e-06, "loss": 1.1684, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2206 }, { "epoch": 1.3845671267252195, "grad_norm": 1.8356767892837524, "learning_rate": 1.3105115482358758e-06, "loss": 1.1264, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2207 }, { "epoch": 1.3851944792973652, "grad_norm": 1.557112455368042, "learning_rate": 1.308105286902646e-06, "loss": 1.0991, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2208 }, { "epoch": 1.3858218318695106, "grad_norm": 1.9413764476776123, "learning_rate": 1.3057004537193424e-06, "loss": 1.252, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2209 }, { "epoch": 1.3864491844416562, "grad_norm": 1.818488359451294, "learning_rate": 1.3032970515674814e-06, "loss": 1.3677, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2210 }, { "epoch": 1.3870765370138018, "grad_norm": 1.8553251028060913, "learning_rate": 1.3008950833268609e-06, "loss": 1.136, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2211 }, { "epoch": 1.3877038895859473, "grad_norm": 1.7978549003601074, "learning_rate": 1.2984945518755654e-06, "loss": 1.1121, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2212 }, { "epoch": 1.388331242158093, "grad_norm": 1.7926452159881592, "learning_rate": 1.2960954600899539e-06, "loss": 1.122, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2213 }, { "epoch": 1.3889585947302385, "grad_norm": 1.7817319631576538, "learning_rate": 1.2936978108446624e-06, "loss": 1.1541, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2214 }, { "epoch": 1.389585947302384, "grad_norm": 1.463486671447754, "learning_rate": 1.2913016070125971e-06, "loss": 1.0774, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2215 }, { "epoch": 1.3902132998745294, "grad_norm": 2.401296377182007, "learning_rate": 1.2889068514649328e-06, "loss": 1.2591, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2216 }, { "epoch": 1.390840652446675, "grad_norm": 1.7470189332962036, "learning_rate": 1.2865135470711107e-06, "loss": 1.2515, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2217 }, { "epoch": 1.3914680050188206, "grad_norm": 1.9201079607009888, "learning_rate": 1.2841216966988295e-06, "loss": 1.2297, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2218 }, { "epoch": 1.392095357590966, "grad_norm": 1.6405366659164429, "learning_rate": 1.2817313032140504e-06, "loss": 1.2744, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2219 }, { "epoch": 1.3927227101631117, "grad_norm": 2.254488468170166, "learning_rate": 1.2793423694809854e-06, "loss": 1.1555, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2220 }, { "epoch": 1.3933500627352573, "grad_norm": 1.7753210067749023, "learning_rate": 1.2769548983620963e-06, "loss": 1.1457, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2221 }, { "epoch": 1.3939774153074027, "grad_norm": 1.7737972736358643, "learning_rate": 1.2745688927180974e-06, "loss": 1.3371, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2222 }, { "epoch": 1.3946047678795483, "grad_norm": 1.9211421012878418, "learning_rate": 1.2721843554079418e-06, "loss": 1.2411, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2223 }, { "epoch": 1.395232120451694, "grad_norm": 2.360780954360962, "learning_rate": 1.2698012892888272e-06, "loss": 1.2071, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2224 }, { "epoch": 1.3958594730238394, "grad_norm": 1.9631240367889404, "learning_rate": 1.2674196972161856e-06, "loss": 1.2879, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2225 }, { "epoch": 1.396486825595985, "grad_norm": 1.9129877090454102, "learning_rate": 1.265039582043685e-06, "loss": 1.2065, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2226 }, { "epoch": 1.3971141781681304, "grad_norm": 1.8536701202392578, "learning_rate": 1.262660946623222e-06, "loss": 1.134, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2227 }, { "epoch": 1.397741530740276, "grad_norm": 2.0723443031311035, "learning_rate": 1.2602837938049204e-06, "loss": 1.4661, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2228 }, { "epoch": 1.3983688833124215, "grad_norm": 1.4898669719696045, "learning_rate": 1.257908126437129e-06, "loss": 1.0878, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2229 }, { "epoch": 1.3989962358845671, "grad_norm": 1.8184411525726318, "learning_rate": 1.2555339473664151e-06, "loss": 1.1638, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2230 }, { "epoch": 1.3996235884567128, "grad_norm": 1.7882477045059204, "learning_rate": 1.2531612594375636e-06, "loss": 1.2015, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2231 }, { "epoch": 1.4002509410288582, "grad_norm": 1.7373894453048706, "learning_rate": 1.2507900654935706e-06, "loss": 1.2848, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2232 }, { "epoch": 1.4008782936010038, "grad_norm": 1.6618754863739014, "learning_rate": 1.2484203683756466e-06, "loss": 1.1881, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2233 }, { "epoch": 1.4015056461731494, "grad_norm": 1.8542975187301636, "learning_rate": 1.2460521709232042e-06, "loss": 1.2826, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2234 }, { "epoch": 1.4021329987452948, "grad_norm": 1.7895851135253906, "learning_rate": 1.24368547597386e-06, "loss": 1.2008, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2235 }, { "epoch": 1.4027603513174405, "grad_norm": 1.808781623840332, "learning_rate": 1.2413202863634328e-06, "loss": 1.2159, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2236 }, { "epoch": 1.403387703889586, "grad_norm": 1.8326939344406128, "learning_rate": 1.238956604925934e-06, "loss": 1.3503, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2237 }, { "epoch": 1.4040150564617315, "grad_norm": 1.768405795097351, "learning_rate": 1.236594434493571e-06, "loss": 1.2804, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2238 }, { "epoch": 1.404642409033877, "grad_norm": 1.4438402652740479, "learning_rate": 1.2342337778967383e-06, "loss": 1.068, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2239 }, { "epoch": 1.4052697616060226, "grad_norm": 1.7598780393600464, "learning_rate": 1.2318746379640192e-06, "loss": 1.2313, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2240 }, { "epoch": 1.4058971141781682, "grad_norm": 1.9519436359405518, "learning_rate": 1.229517017522177e-06, "loss": 1.1764, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2241 }, { "epoch": 1.4065244667503136, "grad_norm": 1.7161890268325806, "learning_rate": 1.2271609193961544e-06, "loss": 1.2437, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2242 }, { "epoch": 1.4071518193224593, "grad_norm": 1.7799817323684692, "learning_rate": 1.2248063464090733e-06, "loss": 1.287, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2243 }, { "epoch": 1.4077791718946049, "grad_norm": 1.674679160118103, "learning_rate": 1.2224533013822237e-06, "loss": 1.4167, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2244 }, { "epoch": 1.4084065244667503, "grad_norm": 1.686480164527893, "learning_rate": 1.2201017871350687e-06, "loss": 1.1705, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2245 }, { "epoch": 1.409033877038896, "grad_norm": 1.886971354484558, "learning_rate": 1.217751806485235e-06, "loss": 1.2974, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2246 }, { "epoch": 1.4096612296110413, "grad_norm": 1.6448758840560913, "learning_rate": 1.2154033622485114e-06, "loss": 1.1193, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2247 }, { "epoch": 1.410288582183187, "grad_norm": 1.8690279722213745, "learning_rate": 1.2130564572388476e-06, "loss": 1.2275, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2248 }, { "epoch": 1.4109159347553324, "grad_norm": 2.0430450439453125, "learning_rate": 1.2107110942683459e-06, "loss": 1.3148, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2249 }, { "epoch": 1.411543287327478, "grad_norm": 1.8752175569534302, "learning_rate": 1.208367276147265e-06, "loss": 1.2259, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2250 }, { "epoch": 1.4121706398996237, "grad_norm": 1.7567509412765503, "learning_rate": 1.206025005684009e-06, "loss": 1.0586, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2251 }, { "epoch": 1.412797992471769, "grad_norm": 1.49163818359375, "learning_rate": 1.2036842856851303e-06, "loss": 1.1927, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2252 }, { "epoch": 1.4134253450439147, "grad_norm": 2.017422914505005, "learning_rate": 1.2013451189553217e-06, "loss": 1.3638, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2253 }, { "epoch": 1.4140526976160603, "grad_norm": 1.748540997505188, "learning_rate": 1.199007508297414e-06, "loss": 1.2116, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2254 }, { "epoch": 1.4146800501882058, "grad_norm": 2.0524532794952393, "learning_rate": 1.196671456512377e-06, "loss": 1.2057, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2255 }, { "epoch": 1.4153074027603514, "grad_norm": 2.4321794509887695, "learning_rate": 1.1943369663993083e-06, "loss": 1.1794, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2256 }, { "epoch": 1.4159347553324968, "grad_norm": 1.8823251724243164, "learning_rate": 1.1920040407554384e-06, "loss": 1.2584, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2257 }, { "epoch": 1.4165621079046424, "grad_norm": 1.7821834087371826, "learning_rate": 1.1896726823761195e-06, "loss": 1.3296, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2258 }, { "epoch": 1.4171894604767878, "grad_norm": 1.4383490085601807, "learning_rate": 1.1873428940548293e-06, "loss": 1.222, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2259 }, { "epoch": 1.4178168130489335, "grad_norm": 1.8384606838226318, "learning_rate": 1.1850146785831618e-06, "loss": 1.2738, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2260 }, { "epoch": 1.4184441656210791, "grad_norm": 1.805746078491211, "learning_rate": 1.1826880387508258e-06, "loss": 1.2846, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2261 }, { "epoch": 1.4190715181932245, "grad_norm": 1.5791975259780884, "learning_rate": 1.180362977345646e-06, "loss": 1.1872, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2262 }, { "epoch": 1.4196988707653702, "grad_norm": 1.7926671504974365, "learning_rate": 1.1780394971535516e-06, "loss": 1.3442, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2263 }, { "epoch": 1.4203262233375158, "grad_norm": 1.82709801197052, "learning_rate": 1.1757176009585795e-06, "loss": 1.0753, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2264 }, { "epoch": 1.4209535759096612, "grad_norm": 1.4206048250198364, "learning_rate": 1.1733972915428665e-06, "loss": 1.1006, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2265 }, { "epoch": 1.4215809284818068, "grad_norm": 1.908329963684082, "learning_rate": 1.171078571686652e-06, "loss": 1.2698, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2266 }, { "epoch": 1.4222082810539523, "grad_norm": 2.473623752593994, "learning_rate": 1.1687614441682667e-06, "loss": 1.4153, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2267 }, { "epoch": 1.4228356336260979, "grad_norm": 2.12664532661438, "learning_rate": 1.1664459117641344e-06, "loss": 1.2839, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2268 }, { "epoch": 1.4234629861982433, "grad_norm": 1.7684519290924072, "learning_rate": 1.16413197724877e-06, "loss": 1.1967, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2269 }, { "epoch": 1.424090338770389, "grad_norm": 1.3864423036575317, "learning_rate": 1.16181964339477e-06, "loss": 1.1686, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2270 }, { "epoch": 1.4247176913425346, "grad_norm": 1.5991979837417603, "learning_rate": 1.1595089129728165e-06, "loss": 1.3387, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2271 }, { "epoch": 1.42534504391468, "grad_norm": 1.8228894472122192, "learning_rate": 1.1571997887516672e-06, "loss": 1.1316, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2272 }, { "epoch": 1.4259723964868256, "grad_norm": 1.7148045301437378, "learning_rate": 1.154892273498159e-06, "loss": 1.2169, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2273 }, { "epoch": 1.4265997490589712, "grad_norm": 1.4883177280426025, "learning_rate": 1.1525863699771967e-06, "loss": 1.3343, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2274 }, { "epoch": 1.4272271016311167, "grad_norm": 1.6968177556991577, "learning_rate": 1.1502820809517557e-06, "loss": 1.4402, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2275 }, { "epoch": 1.4278544542032623, "grad_norm": 1.5625579357147217, "learning_rate": 1.1479794091828787e-06, "loss": 1.2919, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2276 }, { "epoch": 1.4284818067754077, "grad_norm": 1.4449665546417236, "learning_rate": 1.1456783574296668e-06, "loss": 1.2676, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2277 }, { "epoch": 1.4291091593475533, "grad_norm": 1.6299374103546143, "learning_rate": 1.1433789284492842e-06, "loss": 1.1585, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2278 }, { "epoch": 1.4297365119196987, "grad_norm": 1.7700270414352417, "learning_rate": 1.1410811249969475e-06, "loss": 1.1956, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2279 }, { "epoch": 1.4303638644918444, "grad_norm": 2.047250509262085, "learning_rate": 1.1387849498259265e-06, "loss": 1.2824, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2280 }, { "epoch": 1.43099121706399, "grad_norm": 1.733454704284668, "learning_rate": 1.1364904056875395e-06, "loss": 1.2353, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2281 }, { "epoch": 1.4316185696361354, "grad_norm": 2.0308773517608643, "learning_rate": 1.1341974953311524e-06, "loss": 1.2762, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2282 }, { "epoch": 1.432245922208281, "grad_norm": 1.8377238512039185, "learning_rate": 1.1319062215041715e-06, "loss": 1.2808, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2283 }, { "epoch": 1.4328732747804267, "grad_norm": 1.727290153503418, "learning_rate": 1.129616586952042e-06, "loss": 1.1955, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2284 }, { "epoch": 1.433500627352572, "grad_norm": 1.7196539640426636, "learning_rate": 1.1273285944182468e-06, "loss": 1.1532, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2285 }, { "epoch": 1.4341279799247177, "grad_norm": 2.2919390201568604, "learning_rate": 1.1250422466442992e-06, "loss": 1.2986, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2286 }, { "epoch": 1.4347553324968632, "grad_norm": 1.6838818788528442, "learning_rate": 1.122757546369744e-06, "loss": 1.3198, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2287 }, { "epoch": 1.4353826850690088, "grad_norm": 1.4195574522018433, "learning_rate": 1.12047449633215e-06, "loss": 1.1265, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2288 }, { "epoch": 1.4360100376411542, "grad_norm": 1.9881501197814941, "learning_rate": 1.1181930992671078e-06, "loss": 1.2263, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2289 }, { "epoch": 1.4366373902132998, "grad_norm": 1.729966640472412, "learning_rate": 1.1159133579082315e-06, "loss": 1.1496, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2290 }, { "epoch": 1.4372647427854455, "grad_norm": 1.7988170385360718, "learning_rate": 1.1136352749871462e-06, "loss": 1.2376, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2291 }, { "epoch": 1.4378920953575909, "grad_norm": 1.5249148607254028, "learning_rate": 1.1113588532334946e-06, "loss": 1.1522, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2292 }, { "epoch": 1.4385194479297365, "grad_norm": 1.36632239818573, "learning_rate": 1.1090840953749253e-06, "loss": 1.3422, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2293 }, { "epoch": 1.4391468005018822, "grad_norm": 1.8093537092208862, "learning_rate": 1.1068110041370938e-06, "loss": 1.2057, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2294 }, { "epoch": 1.4397741530740276, "grad_norm": 2.2041056156158447, "learning_rate": 1.1045395822436617e-06, "loss": 1.3304, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2295 }, { "epoch": 1.4404015056461732, "grad_norm": 1.9485527276992798, "learning_rate": 1.102269832416286e-06, "loss": 1.2654, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2296 }, { "epoch": 1.4410288582183188, "grad_norm": 1.7615538835525513, "learning_rate": 1.1000017573746225e-06, "loss": 1.173, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2297 }, { "epoch": 1.4416562107904642, "grad_norm": 1.8701543807983398, "learning_rate": 1.097735359836321e-06, "loss": 1.221, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2298 }, { "epoch": 1.4422835633626097, "grad_norm": 1.8322738409042358, "learning_rate": 1.0954706425170198e-06, "loss": 1.3195, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2299 }, { "epoch": 1.4429109159347553, "grad_norm": 1.7369928359985352, "learning_rate": 1.0932076081303442e-06, "loss": 1.3012, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2300 }, { "epoch": 1.443538268506901, "grad_norm": 1.7098748683929443, "learning_rate": 1.0909462593879021e-06, "loss": 1.1589, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2301 }, { "epoch": 1.4441656210790463, "grad_norm": 1.913375735282898, "learning_rate": 1.0886865989992847e-06, "loss": 1.3016, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2302 }, { "epoch": 1.444792973651192, "grad_norm": 2.0486834049224854, "learning_rate": 1.0864286296720566e-06, "loss": 1.1933, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2303 }, { "epoch": 1.4454203262233376, "grad_norm": 2.107297420501709, "learning_rate": 1.0841723541117594e-06, "loss": 1.1922, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2304 }, { "epoch": 1.446047678795483, "grad_norm": 1.906996726989746, "learning_rate": 1.0819177750219013e-06, "loss": 1.2011, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2305 }, { "epoch": 1.4466750313676286, "grad_norm": 1.8115808963775635, "learning_rate": 1.0796648951039621e-06, "loss": 1.2571, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2306 }, { "epoch": 1.4473023839397743, "grad_norm": 1.6775972843170166, "learning_rate": 1.0774137170573826e-06, "loss": 1.0947, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2307 }, { "epoch": 1.4479297365119197, "grad_norm": 1.692162036895752, "learning_rate": 1.0751642435795642e-06, "loss": 1.2644, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2308 }, { "epoch": 1.448557089084065, "grad_norm": 1.9846559762954712, "learning_rate": 1.0729164773658692e-06, "loss": 1.2951, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2309 }, { "epoch": 1.4491844416562107, "grad_norm": 1.7943240404129028, "learning_rate": 1.0706704211096098e-06, "loss": 1.2268, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2310 }, { "epoch": 1.4498117942283564, "grad_norm": 1.8496971130371094, "learning_rate": 1.0684260775020532e-06, "loss": 1.2008, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2311 }, { "epoch": 1.4504391468005018, "grad_norm": 1.930126667022705, "learning_rate": 1.0661834492324121e-06, "loss": 1.2997, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2312 }, { "epoch": 1.4510664993726474, "grad_norm": 1.8001741170883179, "learning_rate": 1.0639425389878435e-06, "loss": 1.2232, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2313 }, { "epoch": 1.451693851944793, "grad_norm": 1.777064561843872, "learning_rate": 1.0617033494534486e-06, "loss": 1.1836, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2314 }, { "epoch": 1.4523212045169385, "grad_norm": 1.6934930086135864, "learning_rate": 1.0594658833122642e-06, "loss": 1.2539, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2315 }, { "epoch": 1.452948557089084, "grad_norm": 2.0415735244750977, "learning_rate": 1.0572301432452626e-06, "loss": 1.2492, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2316 }, { "epoch": 1.4535759096612297, "grad_norm": 1.4712687730789185, "learning_rate": 1.0549961319313476e-06, "loss": 1.145, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2317 }, { "epoch": 1.4542032622333751, "grad_norm": 1.9153761863708496, "learning_rate": 1.052763852047354e-06, "loss": 1.1663, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2318 }, { "epoch": 1.4548306148055208, "grad_norm": 1.9745244979858398, "learning_rate": 1.0505333062680383e-06, "loss": 1.2157, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2319 }, { "epoch": 1.4554579673776662, "grad_norm": 1.7918163537979126, "learning_rate": 1.048304497266083e-06, "loss": 1.151, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2320 }, { "epoch": 1.4560853199498118, "grad_norm": 1.8916409015655518, "learning_rate": 1.0460774277120866e-06, "loss": 1.1889, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2321 }, { "epoch": 1.4567126725219572, "grad_norm": 1.5514240264892578, "learning_rate": 1.0438521002745634e-06, "loss": 1.1767, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2322 }, { "epoch": 1.4573400250941029, "grad_norm": 1.4099035263061523, "learning_rate": 1.0416285176199425e-06, "loss": 1.2045, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2323 }, { "epoch": 1.4579673776662485, "grad_norm": 1.954008936882019, "learning_rate": 1.0394066824125604e-06, "loss": 1.2397, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2324 }, { "epoch": 1.458594730238394, "grad_norm": 1.4865626096725464, "learning_rate": 1.0371865973146613e-06, "loss": 1.1996, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2325 }, { "epoch": 1.4592220828105396, "grad_norm": 1.4902006387710571, "learning_rate": 1.03496826498639e-06, "loss": 1.1556, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2326 }, { "epoch": 1.4598494353826852, "grad_norm": 2.036508083343506, "learning_rate": 1.0327516880857949e-06, "loss": 1.3385, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2327 }, { "epoch": 1.4604767879548306, "grad_norm": 1.7831565141677856, "learning_rate": 1.0305368692688175e-06, "loss": 1.1753, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2328 }, { "epoch": 1.4611041405269762, "grad_norm": 1.7710812091827393, "learning_rate": 1.0283238111892929e-06, "loss": 1.202, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2329 }, { "epoch": 1.4617314930991216, "grad_norm": 1.7439368963241577, "learning_rate": 1.0261125164989497e-06, "loss": 1.3092, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2330 }, { "epoch": 1.4623588456712673, "grad_norm": 2.3144192695617676, "learning_rate": 1.0239029878474001e-06, "loss": 1.1785, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2331 }, { "epoch": 1.4629861982434127, "grad_norm": 1.8720273971557617, "learning_rate": 1.0216952278821423e-06, "loss": 1.2678, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2332 }, { "epoch": 1.4636135508155583, "grad_norm": 1.4683690071105957, "learning_rate": 1.019489239248553e-06, "loss": 1.0986, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2333 }, { "epoch": 1.464240903387704, "grad_norm": 1.8687940835952759, "learning_rate": 1.0172850245898893e-06, "loss": 1.213, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2334 }, { "epoch": 1.4648682559598494, "grad_norm": 1.6996122598648071, "learning_rate": 1.0150825865472813e-06, "loss": 1.1504, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2335 }, { "epoch": 1.465495608531995, "grad_norm": 1.8337829113006592, "learning_rate": 1.0128819277597287e-06, "loss": 1.2712, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2336 }, { "epoch": 1.4661229611041406, "grad_norm": 1.7464158535003662, "learning_rate": 1.0106830508641032e-06, "loss": 1.0795, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2337 }, { "epoch": 1.466750313676286, "grad_norm": 1.7541254758834839, "learning_rate": 1.008485958495137e-06, "loss": 1.3494, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2338 }, { "epoch": 1.4673776662484317, "grad_norm": 1.6948381662368774, "learning_rate": 1.0062906532854284e-06, "loss": 1.2058, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2339 }, { "epoch": 1.468005018820577, "grad_norm": 1.8352493047714233, "learning_rate": 1.0040971378654294e-06, "loss": 1.3595, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2340 }, { "epoch": 1.4686323713927227, "grad_norm": 1.7481765747070312, "learning_rate": 1.0019054148634525e-06, "loss": 1.3388, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2341 }, { "epoch": 1.4692597239648681, "grad_norm": 2.0751793384552, "learning_rate": 9.997154869056588e-07, "loss": 1.205, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2342 }, { "epoch": 1.4698870765370138, "grad_norm": 6.425692558288574, "learning_rate": 9.975273566160592e-07, "loss": 1.2013, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2343 }, { "epoch": 1.4705144291091594, "grad_norm": 1.7943389415740967, "learning_rate": 9.953410266165131e-07, "loss": 1.1807, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2344 }, { "epoch": 1.4711417816813048, "grad_norm": 1.886940836906433, "learning_rate": 9.93156499526719e-07, "loss": 1.0951, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2345 }, { "epoch": 1.4717691342534505, "grad_norm": 1.7454280853271484, "learning_rate": 9.909737779642187e-07, "loss": 1.0865, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2346 }, { "epoch": 1.472396486825596, "grad_norm": 1.8696519136428833, "learning_rate": 9.887928645443878e-07, "loss": 1.2695, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2347 }, { "epoch": 1.4730238393977415, "grad_norm": 1.8949179649353027, "learning_rate": 9.866137618804366e-07, "loss": 1.2531, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2348 }, { "epoch": 1.4736511919698871, "grad_norm": 1.9233238697052002, "learning_rate": 9.844364725834058e-07, "loss": 1.3932, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2349 }, { "epoch": 1.4742785445420326, "grad_norm": 1.780385136604309, "learning_rate": 9.822609992621618e-07, "loss": 1.2219, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2350 }, { "epoch": 1.4749058971141782, "grad_norm": 2.673675775527954, "learning_rate": 9.800873445233984e-07, "loss": 1.1437, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2351 }, { "epoch": 1.4755332496863236, "grad_norm": 1.3347368240356445, "learning_rate": 9.77915510971626e-07, "loss": 1.1426, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2352 }, { "epoch": 1.4761606022584692, "grad_norm": 1.907494306564331, "learning_rate": 9.757455012091774e-07, "loss": 1.2547, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2353 }, { "epoch": 1.4767879548306149, "grad_norm": 1.7126989364624023, "learning_rate": 9.735773178361965e-07, "loss": 1.3163, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2354 }, { "epoch": 1.4774153074027603, "grad_norm": 2.050947904586792, "learning_rate": 9.714109634506393e-07, "loss": 1.0594, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2355 }, { "epoch": 1.478042659974906, "grad_norm": 1.7997150421142578, "learning_rate": 9.692464406482727e-07, "loss": 1.2088, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2356 }, { "epoch": 1.4786700125470515, "grad_norm": 1.9468494653701782, "learning_rate": 9.67083752022666e-07, "loss": 1.2096, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2357 }, { "epoch": 1.479297365119197, "grad_norm": 1.8462549448013306, "learning_rate": 9.64922900165193e-07, "loss": 1.143, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2358 }, { "epoch": 1.4799247176913426, "grad_norm": 1.697049617767334, "learning_rate": 9.627638876650245e-07, "loss": 1.3182, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2359 }, { "epoch": 1.480552070263488, "grad_norm": 1.7648268938064575, "learning_rate": 9.606067171091301e-07, "loss": 1.0697, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2360 }, { "epoch": 1.4811794228356336, "grad_norm": 1.8276077508926392, "learning_rate": 9.584513910822704e-07, "loss": 1.3043, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2361 }, { "epoch": 1.481806775407779, "grad_norm": 1.9977463483810425, "learning_rate": 9.562979121669946e-07, "loss": 1.3174, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2362 }, { "epoch": 1.4824341279799247, "grad_norm": 1.7347925901412964, "learning_rate": 9.541462829436426e-07, "loss": 1.2394, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2363 }, { "epoch": 1.4830614805520703, "grad_norm": 1.7249211072921753, "learning_rate": 9.519965059903349e-07, "loss": 1.1495, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2364 }, { "epoch": 1.4836888331242157, "grad_norm": 1.6533867120742798, "learning_rate": 9.498485838829727e-07, "loss": 1.3614, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2365 }, { "epoch": 1.4843161856963614, "grad_norm": 1.8468972444534302, "learning_rate": 9.477025191952352e-07, "loss": 1.36, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2366 }, { "epoch": 1.484943538268507, "grad_norm": 2.0067813396453857, "learning_rate": 9.455583144985772e-07, "loss": 1.2394, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2367 }, { "epoch": 1.4855708908406524, "grad_norm": 1.959493637084961, "learning_rate": 9.434159723622235e-07, "loss": 1.2166, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2368 }, { "epoch": 1.486198243412798, "grad_norm": 1.8856251239776611, "learning_rate": 9.412754953531664e-07, "loss": 1.2236, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2369 }, { "epoch": 1.4868255959849435, "grad_norm": 1.546231985092163, "learning_rate": 9.39136886036166e-07, "loss": 1.1945, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2370 }, { "epoch": 1.487452948557089, "grad_norm": 1.9689160585403442, "learning_rate": 9.370001469737411e-07, "loss": 1.2117, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2371 }, { "epoch": 1.4880803011292345, "grad_norm": 1.8257304430007935, "learning_rate": 9.348652807261732e-07, "loss": 1.2046, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2372 }, { "epoch": 1.4887076537013801, "grad_norm": 1.7765759229660034, "learning_rate": 9.32732289851496e-07, "loss": 1.1348, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2373 }, { "epoch": 1.4893350062735258, "grad_norm": 1.97781240940094, "learning_rate": 9.306011769054999e-07, "loss": 1.1807, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2374 }, { "epoch": 1.4899623588456712, "grad_norm": 1.512803316116333, "learning_rate": 9.28471944441722e-07, "loss": 1.1873, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2375 }, { "epoch": 1.4905897114178168, "grad_norm": 1.8602982759475708, "learning_rate": 9.263445950114469e-07, "loss": 1.2918, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2376 }, { "epoch": 1.4912170639899625, "grad_norm": 1.9396471977233887, "learning_rate": 9.24219131163705e-07, "loss": 1.3401, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2377 }, { "epoch": 1.4918444165621079, "grad_norm": 1.779157280921936, "learning_rate": 9.220955554452637e-07, "loss": 1.1911, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2378 }, { "epoch": 1.4924717691342535, "grad_norm": 1.6677671670913696, "learning_rate": 9.199738704006322e-07, "loss": 1.2692, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2379 }, { "epoch": 1.4930991217063991, "grad_norm": 1.3902426958084106, "learning_rate": 9.17854078572051e-07, "loss": 1.2505, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2380 }, { "epoch": 1.4937264742785445, "grad_norm": 1.772935152053833, "learning_rate": 9.157361824994937e-07, "loss": 1.2273, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2381 }, { "epoch": 1.49435382685069, "grad_norm": 1.8907947540283203, "learning_rate": 9.13620184720661e-07, "loss": 1.2794, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2382 }, { "epoch": 1.4949811794228356, "grad_norm": 1.882448434829712, "learning_rate": 9.1150608777098e-07, "loss": 1.1436, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2383 }, { "epoch": 1.4956085319949812, "grad_norm": 2.5027365684509277, "learning_rate": 9.093938941836012e-07, "loss": 1.1959, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2384 }, { "epoch": 1.4962358845671266, "grad_norm": 1.830198049545288, "learning_rate": 9.072836064893917e-07, "loss": 1.327, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2385 }, { "epoch": 1.4968632371392723, "grad_norm": 1.606887698173523, "learning_rate": 9.051752272169386e-07, "loss": 1.2297, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2386 }, { "epoch": 1.497490589711418, "grad_norm": 1.3970919847488403, "learning_rate": 9.030687588925388e-07, "loss": 1.1981, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2387 }, { "epoch": 1.4981179422835633, "grad_norm": 2.031083822250366, "learning_rate": 9.009642040402022e-07, "loss": 1.2114, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2388 }, { "epoch": 1.498745294855709, "grad_norm": 1.7160199880599976, "learning_rate": 8.98861565181644e-07, "loss": 1.3597, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2389 }, { "epoch": 1.4993726474278546, "grad_norm": 1.7950799465179443, "learning_rate": 8.967608448362836e-07, "loss": 1.1441, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2390 }, { "epoch": 1.5, "grad_norm": 1.7221378087997437, "learning_rate": 8.946620455212438e-07, "loss": 1.2303, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2391 }, { "epoch": 1.5006273525721454, "grad_norm": 1.899416208267212, "learning_rate": 8.925651697513424e-07, "loss": 1.3782, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2392 }, { "epoch": 1.501254705144291, "grad_norm": 1.5485550165176392, "learning_rate": 8.90470220039096e-07, "loss": 1.1964, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2393 }, { "epoch": 1.5018820577164367, "grad_norm": 1.6856316328048706, "learning_rate": 8.883771988947099e-07, "loss": 1.3119, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2394 }, { "epoch": 1.502509410288582, "grad_norm": 1.5902036428451538, "learning_rate": 8.862861088260816e-07, "loss": 1.1776, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2395 }, { "epoch": 1.5031367628607277, "grad_norm": 2.432138442993164, "learning_rate": 8.841969523387922e-07, "loss": 1.4684, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2396 }, { "epoch": 1.5037641154328734, "grad_norm": 1.3001375198364258, "learning_rate": 8.821097319361074e-07, "loss": 1.0717, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2397 }, { "epoch": 1.5043914680050188, "grad_norm": 1.747187614440918, "learning_rate": 8.800244501189722e-07, "loss": 1.3016, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2398 }, { "epoch": 1.5050188205771644, "grad_norm": 1.7088016271591187, "learning_rate": 8.77941109386009e-07, "loss": 1.2023, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2399 }, { "epoch": 1.50564617314931, "grad_norm": 1.6368725299835205, "learning_rate": 8.758597122335158e-07, "loss": 1.1448, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2400 }, { "epoch": 1.5062735257214555, "grad_norm": 1.5822457075119019, "learning_rate": 8.737802611554596e-07, "loss": 1.1753, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2401 }, { "epoch": 1.5069008782936009, "grad_norm": 1.209599256515503, "learning_rate": 8.717027586434759e-07, "loss": 1.1381, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2402 }, { "epoch": 1.5075282308657465, "grad_norm": 1.73714017868042, "learning_rate": 8.696272071868678e-07, "loss": 1.1301, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2403 }, { "epoch": 1.5081555834378921, "grad_norm": 2.023517370223999, "learning_rate": 8.675536092725967e-07, "loss": 1.2922, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2404 }, { "epoch": 1.5087829360100375, "grad_norm": 1.518594741821289, "learning_rate": 8.654819673852874e-07, "loss": 1.2086, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2405 }, { "epoch": 1.5094102885821832, "grad_norm": 2.004223108291626, "learning_rate": 8.634122840072171e-07, "loss": 1.3216, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2406 }, { "epoch": 1.5100376411543288, "grad_norm": 1.823880672454834, "learning_rate": 8.613445616183197e-07, "loss": 1.3131, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2407 }, { "epoch": 1.5106649937264742, "grad_norm": 1.5020653009414673, "learning_rate": 8.592788026961769e-07, "loss": 1.2801, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2408 }, { "epoch": 1.5112923462986199, "grad_norm": 1.8484547138214111, "learning_rate": 8.572150097160179e-07, "loss": 1.1977, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2409 }, { "epoch": 1.5119196988707655, "grad_norm": 1.4522210359573364, "learning_rate": 8.551531851507186e-07, "loss": 1.1126, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2410 }, { "epoch": 1.512547051442911, "grad_norm": 1.6940292119979858, "learning_rate": 8.530933314707932e-07, "loss": 1.1495, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2411 }, { "epoch": 1.5131744040150563, "grad_norm": 2.1096079349517822, "learning_rate": 8.510354511443975e-07, "loss": 1.3074, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2412 }, { "epoch": 1.513801756587202, "grad_norm": 1.2838313579559326, "learning_rate": 8.489795466373199e-07, "loss": 1.2711, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2413 }, { "epoch": 1.5144291091593476, "grad_norm": 1.7509784698486328, "learning_rate": 8.469256204129827e-07, "loss": 1.3424, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2414 }, { "epoch": 1.515056461731493, "grad_norm": 1.9112739562988281, "learning_rate": 8.448736749324382e-07, "loss": 1.2579, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2415 }, { "epoch": 1.5156838143036386, "grad_norm": 1.7965364456176758, "learning_rate": 8.428237126543634e-07, "loss": 1.2314, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2416 }, { "epoch": 1.5163111668757843, "grad_norm": 1.9476574659347534, "learning_rate": 8.407757360350624e-07, "loss": 1.32, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2417 }, { "epoch": 1.5169385194479297, "grad_norm": 1.7615290880203247, "learning_rate": 8.387297475284562e-07, "loss": 1.2656, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2418 }, { "epoch": 1.5175658720200753, "grad_norm": 2.400646686553955, "learning_rate": 8.366857495860869e-07, "loss": 1.2182, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2419 }, { "epoch": 1.518193224592221, "grad_norm": 2.004615306854248, "learning_rate": 8.34643744657109e-07, "loss": 1.1982, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2420 }, { "epoch": 1.5188205771643664, "grad_norm": 1.8327479362487793, "learning_rate": 8.32603735188291e-07, "loss": 1.2677, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2421 }, { "epoch": 1.5194479297365118, "grad_norm": 1.8293399810791016, "learning_rate": 8.305657236240086e-07, "loss": 1.2551, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2422 }, { "epoch": 1.5200752823086576, "grad_norm": 1.6446075439453125, "learning_rate": 8.285297124062439e-07, "loss": 1.314, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2423 }, { "epoch": 1.520702634880803, "grad_norm": 1.8601378202438354, "learning_rate": 8.264957039745835e-07, "loss": 1.2594, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2424 }, { "epoch": 1.5213299874529485, "grad_norm": 1.7208938598632812, "learning_rate": 8.244637007662126e-07, "loss": 1.2329, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2425 }, { "epoch": 1.521957340025094, "grad_norm": 1.8246501684188843, "learning_rate": 8.224337052159154e-07, "loss": 1.1892, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2426 }, { "epoch": 1.5225846925972397, "grad_norm": 1.7943869829177856, "learning_rate": 8.204057197560681e-07, "loss": 1.1979, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2427 }, { "epoch": 1.5232120451693851, "grad_norm": 1.817122220993042, "learning_rate": 8.18379746816641e-07, "loss": 1.3934, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2428 }, { "epoch": 1.5238393977415308, "grad_norm": 1.78376305103302, "learning_rate": 8.163557888251916e-07, "loss": 1.3509, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2429 }, { "epoch": 1.5244667503136764, "grad_norm": 2.2729251384735107, "learning_rate": 8.143338482068628e-07, "loss": 1.1322, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2430 }, { "epoch": 1.5250941028858218, "grad_norm": 2.0838749408721924, "learning_rate": 8.123139273843802e-07, "loss": 1.1556, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2431 }, { "epoch": 1.5257214554579672, "grad_norm": 1.7555264234542847, "learning_rate": 8.102960287780498e-07, "loss": 1.216, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2432 }, { "epoch": 1.526348808030113, "grad_norm": 3.866431474685669, "learning_rate": 8.082801548057553e-07, "loss": 1.2201, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2433 }, { "epoch": 1.5269761606022585, "grad_norm": 1.5737709999084473, "learning_rate": 8.062663078829524e-07, "loss": 1.2154, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2434 }, { "epoch": 1.527603513174404, "grad_norm": 1.448691964149475, "learning_rate": 8.042544904226704e-07, "loss": 1.2301, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2435 }, { "epoch": 1.5282308657465495, "grad_norm": 2.6556127071380615, "learning_rate": 8.02244704835505e-07, "loss": 1.2506, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2436 }, { "epoch": 1.5288582183186952, "grad_norm": 1.8680992126464844, "learning_rate": 8.002369535296168e-07, "loss": 1.2128, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2437 }, { "epoch": 1.5294855708908406, "grad_norm": 1.5463523864746094, "learning_rate": 7.982312389107319e-07, "loss": 1.1466, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2438 }, { "epoch": 1.5301129234629862, "grad_norm": 1.889672040939331, "learning_rate": 7.962275633821321e-07, "loss": 1.3728, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2439 }, { "epoch": 1.5307402760351319, "grad_norm": 1.8735464811325073, "learning_rate": 7.942259293446594e-07, "loss": 1.2968, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2440 }, { "epoch": 1.5313676286072773, "grad_norm": 1.7368452548980713, "learning_rate": 7.922263391967069e-07, "loss": 1.2953, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2441 }, { "epoch": 1.5319949811794227, "grad_norm": 1.8116888999938965, "learning_rate": 7.902287953342213e-07, "loss": 1.2372, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2442 }, { "epoch": 1.5326223337515685, "grad_norm": 1.717750906944275, "learning_rate": 7.882333001506953e-07, "loss": 1.1085, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2443 }, { "epoch": 1.533249686323714, "grad_norm": 2.0107264518737793, "learning_rate": 7.862398560371665e-07, "loss": 1.1305, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2444 }, { "epoch": 1.5338770388958594, "grad_norm": 1.9042125940322876, "learning_rate": 7.842484653822177e-07, "loss": 1.2038, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2445 }, { "epoch": 1.534504391468005, "grad_norm": 1.7293494939804077, "learning_rate": 7.822591305719684e-07, "loss": 1.1182, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2446 }, { "epoch": 1.5351317440401506, "grad_norm": 1.9139995574951172, "learning_rate": 7.802718539900761e-07, "loss": 1.2507, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2447 }, { "epoch": 1.535759096612296, "grad_norm": 1.5856516361236572, "learning_rate": 7.782866380177307e-07, "loss": 1.285, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2448 }, { "epoch": 1.5363864491844417, "grad_norm": 1.739535927772522, "learning_rate": 7.763034850336554e-07, "loss": 1.2812, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2449 }, { "epoch": 1.5370138017565873, "grad_norm": 1.9271031618118286, "learning_rate": 7.743223974141001e-07, "loss": 1.1226, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2450 }, { "epoch": 1.5376411543287327, "grad_norm": 1.4703642129898071, "learning_rate": 7.723433775328385e-07, "loss": 1.1203, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2451 }, { "epoch": 1.5382685069008781, "grad_norm": 1.5959513187408447, "learning_rate": 7.703664277611702e-07, "loss": 1.381, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2452 }, { "epoch": 1.538895859473024, "grad_norm": 1.8221240043640137, "learning_rate": 7.683915504679109e-07, "loss": 1.2892, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2453 }, { "epoch": 1.5395232120451694, "grad_norm": 1.7736937999725342, "learning_rate": 7.66418748019396e-07, "loss": 1.0808, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2454 }, { "epoch": 1.5401505646173148, "grad_norm": 1.6039260625839233, "learning_rate": 7.644480227794723e-07, "loss": 1.2808, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2455 }, { "epoch": 1.5407779171894604, "grad_norm": 1.7460005283355713, "learning_rate": 7.624793771094979e-07, "loss": 1.1979, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2456 }, { "epoch": 1.541405269761606, "grad_norm": 1.8570126295089722, "learning_rate": 7.605128133683414e-07, "loss": 1.104, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2457 }, { "epoch": 1.5420326223337515, "grad_norm": 2.0892014503479004, "learning_rate": 7.585483339123739e-07, "loss": 1.3555, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2458 }, { "epoch": 1.5426599749058971, "grad_norm": 1.9000020027160645, "learning_rate": 7.565859410954718e-07, "loss": 1.401, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2459 }, { "epoch": 1.5432873274780428, "grad_norm": 2.0705411434173584, "learning_rate": 7.546256372690086e-07, "loss": 1.2129, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2460 }, { "epoch": 1.5439146800501882, "grad_norm": 1.8334147930145264, "learning_rate": 7.526674247818569e-07, "loss": 1.3082, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2461 }, { "epoch": 1.5445420326223338, "grad_norm": 1.836845874786377, "learning_rate": 7.507113059803825e-07, "loss": 1.2692, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2462 }, { "epoch": 1.5451693851944794, "grad_norm": 1.8347852230072021, "learning_rate": 7.48757283208442e-07, "loss": 1.1857, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2463 }, { "epoch": 1.5457967377666249, "grad_norm": 1.5334773063659668, "learning_rate": 7.468053588073803e-07, "loss": 1.1636, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2464 }, { "epoch": 1.5464240903387703, "grad_norm": 1.8923527002334595, "learning_rate": 7.448555351160308e-07, "loss": 1.3495, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2465 }, { "epoch": 1.547051442910916, "grad_norm": 2.0651772022247314, "learning_rate": 7.429078144707064e-07, "loss": 1.145, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2466 }, { "epoch": 1.5476787954830615, "grad_norm": 1.8705108165740967, "learning_rate": 7.409621992052007e-07, "loss": 1.207, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2467 }, { "epoch": 1.548306148055207, "grad_norm": 1.8827093839645386, "learning_rate": 7.390186916507869e-07, "loss": 1.2828, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2468 }, { "epoch": 1.5489335006273526, "grad_norm": 1.8180711269378662, "learning_rate": 7.370772941362106e-07, "loss": 1.3038, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2469 }, { "epoch": 1.5495608531994982, "grad_norm": 1.8148471117019653, "learning_rate": 7.351380089876886e-07, "loss": 1.1912, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2470 }, { "epoch": 1.5501882057716436, "grad_norm": 1.8098262548446655, "learning_rate": 7.332008385289091e-07, "loss": 1.1659, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2471 }, { "epoch": 1.5508155583437893, "grad_norm": 1.2533210515975952, "learning_rate": 7.312657850810234e-07, "loss": 1.0858, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2472 }, { "epoch": 1.551442910915935, "grad_norm": 1.871936559677124, "learning_rate": 7.293328509626497e-07, "loss": 1.2199, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2473 }, { "epoch": 1.5520702634880803, "grad_norm": 1.6073999404907227, "learning_rate": 7.274020384898628e-07, "loss": 1.2168, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2474 }, { "epoch": 1.5526976160602257, "grad_norm": 1.662189245223999, "learning_rate": 7.254733499761993e-07, "loss": 1.239, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2475 }, { "epoch": 1.5533249686323714, "grad_norm": 1.7758508920669556, "learning_rate": 7.235467877326476e-07, "loss": 1.3394, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2476 }, { "epoch": 1.553952321204517, "grad_norm": 1.8011035919189453, "learning_rate": 7.216223540676495e-07, "loss": 1.4568, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2477 }, { "epoch": 1.5545796737766624, "grad_norm": 5.772345542907715, "learning_rate": 7.197000512870974e-07, "loss": 1.3203, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2478 }, { "epoch": 1.555207026348808, "grad_norm": 1.6594387292861938, "learning_rate": 7.177798816943288e-07, "loss": 1.1403, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2479 }, { "epoch": 1.5558343789209537, "grad_norm": 2.6319494247436523, "learning_rate": 7.158618475901252e-07, "loss": 1.2109, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2480 }, { "epoch": 1.556461731493099, "grad_norm": 1.813653588294983, "learning_rate": 7.139459512727115e-07, "loss": 1.1186, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2481 }, { "epoch": 1.5570890840652447, "grad_norm": 1.7633485794067383, "learning_rate": 7.120321950377487e-07, "loss": 1.148, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2482 }, { "epoch": 1.5577164366373903, "grad_norm": 1.852625846862793, "learning_rate": 7.101205811783343e-07, "loss": 1.1101, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2483 }, { "epoch": 1.5583437892095358, "grad_norm": 1.7904287576675415, "learning_rate": 7.08211111984998e-07, "loss": 1.2955, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2484 }, { "epoch": 1.5589711417816812, "grad_norm": 1.7286176681518555, "learning_rate": 7.063037897457023e-07, "loss": 1.2696, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2485 }, { "epoch": 1.5595984943538268, "grad_norm": 1.7110837697982788, "learning_rate": 7.043986167458336e-07, "loss": 1.2207, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2486 }, { "epoch": 1.5602258469259724, "grad_norm": 1.8859424591064453, "learning_rate": 7.024955952682064e-07, "loss": 1.1963, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2487 }, { "epoch": 1.5608531994981178, "grad_norm": 1.711631417274475, "learning_rate": 7.005947275930541e-07, "loss": 1.1367, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2488 }, { "epoch": 1.5614805520702635, "grad_norm": 1.6996835470199585, "learning_rate": 6.986960159980327e-07, "loss": 1.1912, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2489 }, { "epoch": 1.5621079046424091, "grad_norm": 1.7976754903793335, "learning_rate": 6.96799462758212e-07, "loss": 1.222, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2490 }, { "epoch": 1.5627352572145545, "grad_norm": 1.5648021697998047, "learning_rate": 6.949050701460758e-07, "loss": 1.09, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2491 }, { "epoch": 1.5633626097867002, "grad_norm": 1.8570146560668945, "learning_rate": 6.930128404315214e-07, "loss": 1.2234, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2492 }, { "epoch": 1.5639899623588458, "grad_norm": 1.8520839214324951, "learning_rate": 6.911227758818515e-07, "loss": 1.2482, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2493 }, { "epoch": 1.5646173149309912, "grad_norm": 2.30804181098938, "learning_rate": 6.89234878761777e-07, "loss": 1.2595, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2494 }, { "epoch": 1.5652446675031366, "grad_norm": 1.8586586713790894, "learning_rate": 6.873491513334099e-07, "loss": 1.304, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2495 }, { "epoch": 1.5658720200752823, "grad_norm": 1.7122609615325928, "learning_rate": 6.854655958562625e-07, "loss": 1.3498, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2496 }, { "epoch": 1.566499372647428, "grad_norm": 1.9733279943466187, "learning_rate": 6.835842145872468e-07, "loss": 1.2857, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2497 }, { "epoch": 1.5671267252195733, "grad_norm": 1.871863842010498, "learning_rate": 6.817050097806674e-07, "loss": 1.1074, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2498 }, { "epoch": 1.567754077791719, "grad_norm": 1.6728519201278687, "learning_rate": 6.798279836882211e-07, "loss": 1.2012, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2499 }, { "epoch": 1.5683814303638646, "grad_norm": 1.7090595960617065, "learning_rate": 6.779531385589944e-07, "loss": 1.1679, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2500 }, { "epoch": 1.56900878293601, "grad_norm": 1.8407670259475708, "learning_rate": 6.760804766394624e-07, "loss": 1.1853, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2501 }, { "epoch": 1.5696361355081556, "grad_norm": 1.6820379495620728, "learning_rate": 6.742100001734817e-07, "loss": 1.1855, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2502 }, { "epoch": 1.5702634880803013, "grad_norm": 1.8091737031936646, "learning_rate": 6.723417114022907e-07, "loss": 1.1984, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2503 }, { "epoch": 1.5708908406524467, "grad_norm": 1.671722173690796, "learning_rate": 6.704756125645082e-07, "loss": 1.3158, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2504 }, { "epoch": 1.571518193224592, "grad_norm": 1.5467654466629028, "learning_rate": 6.686117058961267e-07, "loss": 1.1931, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2505 }, { "epoch": 1.572145545796738, "grad_norm": 1.8012003898620605, "learning_rate": 6.667499936305142e-07, "loss": 1.202, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2506 }, { "epoch": 1.5727728983688833, "grad_norm": 1.6461151838302612, "learning_rate": 6.648904779984067e-07, "loss": 1.0794, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2507 }, { "epoch": 1.5734002509410288, "grad_norm": 1.5227487087249756, "learning_rate": 6.630331612279112e-07, "loss": 1.2715, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2508 }, { "epoch": 1.5740276035131744, "grad_norm": 1.8711963891983032, "learning_rate": 6.61178045544498e-07, "loss": 1.2422, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2509 }, { "epoch": 1.57465495608532, "grad_norm": 1.4674845933914185, "learning_rate": 6.593251331709993e-07, "loss": 1.181, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2510 }, { "epoch": 1.5752823086574654, "grad_norm": 1.8265960216522217, "learning_rate": 6.5747442632761e-07, "loss": 1.2212, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2511 }, { "epoch": 1.575909661229611, "grad_norm": 1.6680943965911865, "learning_rate": 6.556259272318797e-07, "loss": 1.1853, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2512 }, { "epoch": 1.5765370138017567, "grad_norm": 1.9921751022338867, "learning_rate": 6.537796380987144e-07, "loss": 1.1803, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2513 }, { "epoch": 1.5771643663739021, "grad_norm": 1.7228193283081055, "learning_rate": 6.51935561140371e-07, "loss": 1.3567, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2514 }, { "epoch": 1.5777917189460475, "grad_norm": 1.6541950702667236, "learning_rate": 6.500936985664561e-07, "loss": 1.2213, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2515 }, { "epoch": 1.5784190715181934, "grad_norm": 1.579052209854126, "learning_rate": 6.482540525839226e-07, "loss": 1.2772, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2516 }, { "epoch": 1.5790464240903388, "grad_norm": 1.8425257205963135, "learning_rate": 6.464166253970672e-07, "loss": 1.2691, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2517 }, { "epoch": 1.5796737766624842, "grad_norm": 1.6395012140274048, "learning_rate": 6.445814192075304e-07, "loss": 1.1531, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2518 }, { "epoch": 1.5803011292346298, "grad_norm": 1.2978256940841675, "learning_rate": 6.427484362142877e-07, "loss": 1.1268, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2519 }, { "epoch": 1.5809284818067755, "grad_norm": 1.783844232559204, "learning_rate": 6.409176786136542e-07, "loss": 1.1135, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2520 }, { "epoch": 1.5815558343789209, "grad_norm": 1.8337434530258179, "learning_rate": 6.390891485992757e-07, "loss": 1.2873, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2521 }, { "epoch": 1.5821831869510665, "grad_norm": 1.6792529821395874, "learning_rate": 6.372628483621315e-07, "loss": 1.1688, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2522 }, { "epoch": 1.5828105395232122, "grad_norm": 1.58247971534729, "learning_rate": 6.35438780090527e-07, "loss": 1.1782, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2523 }, { "epoch": 1.5834378920953576, "grad_norm": 1.7473936080932617, "learning_rate": 6.336169459700933e-07, "loss": 1.1864, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2524 }, { "epoch": 1.584065244667503, "grad_norm": 1.6084119081497192, "learning_rate": 6.317973481837866e-07, "loss": 1.2161, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2525 }, { "epoch": 1.5846925972396488, "grad_norm": 1.8634337186813354, "learning_rate": 6.299799889118808e-07, "loss": 1.1492, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2526 }, { "epoch": 1.5853199498117942, "grad_norm": 1.4772895574569702, "learning_rate": 6.281648703319698e-07, "loss": 1.1645, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2527 }, { "epoch": 1.5859473023839397, "grad_norm": 1.8228148221969604, "learning_rate": 6.263519946189608e-07, "loss": 1.2619, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2528 }, { "epoch": 1.5865746549560853, "grad_norm": 1.7683981657028198, "learning_rate": 6.245413639450757e-07, "loss": 1.1456, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2529 }, { "epoch": 1.587202007528231, "grad_norm": 1.4007647037506104, "learning_rate": 6.227329804798444e-07, "loss": 1.1303, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2530 }, { "epoch": 1.5878293601003763, "grad_norm": 1.8269098997116089, "learning_rate": 6.209268463901047e-07, "loss": 1.2663, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2531 }, { "epoch": 1.588456712672522, "grad_norm": 2.1203908920288086, "learning_rate": 6.191229638399995e-07, "loss": 1.1958, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2532 }, { "epoch": 1.5890840652446676, "grad_norm": 1.8962397575378418, "learning_rate": 6.17321334990973e-07, "loss": 1.3296, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2533 }, { "epoch": 1.589711417816813, "grad_norm": 1.7916409969329834, "learning_rate": 6.155219620017707e-07, "loss": 1.2352, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2534 }, { "epoch": 1.5903387703889584, "grad_norm": 1.7805792093276978, "learning_rate": 6.137248470284332e-07, "loss": 1.4115, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2535 }, { "epoch": 1.5909661229611043, "grad_norm": 1.6655528545379639, "learning_rate": 6.119299922242975e-07, "loss": 1.1256, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2536 }, { "epoch": 1.5915934755332497, "grad_norm": 2.02083420753479, "learning_rate": 6.101373997399907e-07, "loss": 1.3301, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2537 }, { "epoch": 1.5922208281053951, "grad_norm": 1.6873060464859009, "learning_rate": 6.083470717234285e-07, "loss": 1.2483, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2538 }, { "epoch": 1.5928481806775407, "grad_norm": 1.4721137285232544, "learning_rate": 6.065590103198166e-07, "loss": 1.2483, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2539 }, { "epoch": 1.5934755332496864, "grad_norm": 1.7473325729370117, "learning_rate": 6.047732176716409e-07, "loss": 1.2648, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2540 }, { "epoch": 1.5941028858218318, "grad_norm": 1.768115520477295, "learning_rate": 6.029896959186721e-07, "loss": 1.3562, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2541 }, { "epoch": 1.5947302383939774, "grad_norm": 1.7181570529937744, "learning_rate": 6.012084471979571e-07, "loss": 1.1847, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2542 }, { "epoch": 1.595357590966123, "grad_norm": 1.8423340320587158, "learning_rate": 5.994294736438219e-07, "loss": 1.2472, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2543 }, { "epoch": 1.5959849435382685, "grad_norm": 1.721067190170288, "learning_rate": 5.97652777387864e-07, "loss": 1.3293, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2544 }, { "epoch": 1.5966122961104139, "grad_norm": 1.8208516836166382, "learning_rate": 5.95878360558953e-07, "loss": 1.2584, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2545 }, { "epoch": 1.5972396486825597, "grad_norm": 1.639552354812622, "learning_rate": 5.941062252832283e-07, "loss": 1.1548, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2546 }, { "epoch": 1.5978670012547052, "grad_norm": 1.8412017822265625, "learning_rate": 5.923363736840946e-07, "loss": 1.2064, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2547 }, { "epoch": 1.5984943538268506, "grad_norm": 1.747814416885376, "learning_rate": 5.905688078822192e-07, "loss": 1.3132, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2548 }, { "epoch": 1.5991217063989962, "grad_norm": 1.5099161863327026, "learning_rate": 5.888035299955325e-07, "loss": 1.0845, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2549 }, { "epoch": 1.5997490589711418, "grad_norm": 1.4273351430892944, "learning_rate": 5.870405421392217e-07, "loss": 1.2287, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2550 }, { "epoch": 1.6003764115432872, "grad_norm": 1.7674329280853271, "learning_rate": 5.852798464257323e-07, "loss": 1.3752, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2551 }, { "epoch": 1.6010037641154329, "grad_norm": 1.6482888460159302, "learning_rate": 5.835214449647602e-07, "loss": 1.405, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2552 }, { "epoch": 1.6016311166875785, "grad_norm": 1.7106801271438599, "learning_rate": 5.817653398632559e-07, "loss": 1.1694, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2553 }, { "epoch": 1.602258469259724, "grad_norm": 1.8246630430221558, "learning_rate": 5.800115332254144e-07, "loss": 1.1745, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2554 }, { "epoch": 1.6028858218318696, "grad_norm": 1.8861138820648193, "learning_rate": 5.782600271526812e-07, "loss": 1.2363, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2555 }, { "epoch": 1.6035131744040152, "grad_norm": 1.6869688034057617, "learning_rate": 5.765108237437409e-07, "loss": 1.0971, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2556 }, { "epoch": 1.6041405269761606, "grad_norm": 1.5465879440307617, "learning_rate": 5.747639250945208e-07, "loss": 1.1556, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2557 }, { "epoch": 1.604767879548306, "grad_norm": 1.6890803575515747, "learning_rate": 5.730193332981879e-07, "loss": 1.1229, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2558 }, { "epoch": 1.6053952321204517, "grad_norm": 1.8298060894012451, "learning_rate": 5.712770504451426e-07, "loss": 1.2463, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2559 }, { "epoch": 1.6060225846925973, "grad_norm": 2.0945799350738525, "learning_rate": 5.695370786230208e-07, "loss": 1.458, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2560 }, { "epoch": 1.6066499372647427, "grad_norm": 1.846012830734253, "learning_rate": 5.677994199166875e-07, "loss": 1.3112, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2561 }, { "epoch": 1.6072772898368883, "grad_norm": 1.8249188661575317, "learning_rate": 5.660640764082381e-07, "loss": 1.1594, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2562 }, { "epoch": 1.607904642409034, "grad_norm": 1.952448844909668, "learning_rate": 5.643310501769924e-07, "loss": 1.3515, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2563 }, { "epoch": 1.6085319949811794, "grad_norm": 2.193516254425049, "learning_rate": 5.626003432994934e-07, "loss": 1.2284, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2564 }, { "epoch": 1.609159347553325, "grad_norm": 1.8074815273284912, "learning_rate": 5.608719578495059e-07, "loss": 1.1216, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2565 }, { "epoch": 1.6097867001254706, "grad_norm": 1.4073917865753174, "learning_rate": 5.591458958980123e-07, "loss": 1.1088, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2566 }, { "epoch": 1.610414052697616, "grad_norm": 1.7987900972366333, "learning_rate": 5.574221595132124e-07, "loss": 1.2133, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2567 }, { "epoch": 1.6110414052697615, "grad_norm": 1.6239173412322998, "learning_rate": 5.557007507605172e-07, "loss": 1.0926, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2568 }, { "epoch": 1.611668757841907, "grad_norm": 1.8376796245574951, "learning_rate": 5.539816717025515e-07, "loss": 1.2056, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2569 }, { "epoch": 1.6122961104140527, "grad_norm": 1.8480087518692017, "learning_rate": 5.522649243991463e-07, "loss": 1.251, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2570 }, { "epoch": 1.6129234629861982, "grad_norm": 1.8004862070083618, "learning_rate": 5.50550510907339e-07, "loss": 1.1975, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2571 }, { "epoch": 1.6135508155583438, "grad_norm": 1.6661462783813477, "learning_rate": 5.488384332813718e-07, "loss": 1.4225, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2572 }, { "epoch": 1.6141781681304894, "grad_norm": 1.7336554527282715, "learning_rate": 5.471286935726866e-07, "loss": 1.1692, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2573 }, { "epoch": 1.6148055207026348, "grad_norm": 1.893118977546692, "learning_rate": 5.454212938299256e-07, "loss": 1.1888, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2574 }, { "epoch": 1.6154328732747805, "grad_norm": 1.7622027397155762, "learning_rate": 5.437162360989251e-07, "loss": 1.298, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2575 }, { "epoch": 1.616060225846926, "grad_norm": 1.1501035690307617, "learning_rate": 5.420135224227174e-07, "loss": 1.2508, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2576 }, { "epoch": 1.6166875784190715, "grad_norm": 1.8274246454238892, "learning_rate": 5.403131548415244e-07, "loss": 1.2622, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2577 }, { "epoch": 1.617314930991217, "grad_norm": 1.746263027191162, "learning_rate": 5.386151353927569e-07, "loss": 1.3823, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2578 }, { "epoch": 1.6179422835633626, "grad_norm": 1.7620964050292969, "learning_rate": 5.369194661110138e-07, "loss": 1.2652, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2579 }, { "epoch": 1.6185696361355082, "grad_norm": 1.7409865856170654, "learning_rate": 5.352261490280767e-07, "loss": 1.1592, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2580 }, { "epoch": 1.6191969887076536, "grad_norm": 1.9257817268371582, "learning_rate": 5.335351861729082e-07, "loss": 1.2101, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2581 }, { "epoch": 1.6198243412797992, "grad_norm": 2.0328640937805176, "learning_rate": 5.318465795716507e-07, "loss": 1.2135, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2582 }, { "epoch": 1.6204516938519449, "grad_norm": 1.7458486557006836, "learning_rate": 5.301603312476242e-07, "loss": 1.1808, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2583 }, { "epoch": 1.6210790464240903, "grad_norm": 1.7722233533859253, "learning_rate": 5.284764432213222e-07, "loss": 1.2224, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2584 }, { "epoch": 1.621706398996236, "grad_norm": 1.5155515670776367, "learning_rate": 5.267949175104084e-07, "loss": 1.2597, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2585 }, { "epoch": 1.6223337515683816, "grad_norm": 1.7921104431152344, "learning_rate": 5.251157561297193e-07, "loss": 1.1613, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2586 }, { "epoch": 1.622961104140527, "grad_norm": 1.710188388824463, "learning_rate": 5.234389610912552e-07, "loss": 1.2128, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2587 }, { "epoch": 1.6235884567126724, "grad_norm": 1.8385692834854126, "learning_rate": 5.217645344041838e-07, "loss": 1.3291, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2588 }, { "epoch": 1.624215809284818, "grad_norm": 2.3026092052459717, "learning_rate": 5.200924780748323e-07, "loss": 1.1366, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2589 }, { "epoch": 1.6248431618569636, "grad_norm": 1.6695224046707153, "learning_rate": 5.184227941066902e-07, "loss": 1.198, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2590 }, { "epoch": 1.625470514429109, "grad_norm": 1.778435230255127, "learning_rate": 5.167554845004028e-07, "loss": 1.2428, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2591 }, { "epoch": 1.6260978670012547, "grad_norm": 1.6701916456222534, "learning_rate": 5.150905512537699e-07, "loss": 1.1933, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2592 }, { "epoch": 1.6267252195734003, "grad_norm": 1.9535346031188965, "learning_rate": 5.134279963617461e-07, "loss": 1.2006, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2593 }, { "epoch": 1.6273525721455457, "grad_norm": 1.6672227382659912, "learning_rate": 5.117678218164337e-07, "loss": 1.2237, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2594 }, { "epoch": 1.6279799247176914, "grad_norm": 1.7902626991271973, "learning_rate": 5.101100296070852e-07, "loss": 1.3211, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2595 }, { "epoch": 1.628607277289837, "grad_norm": 1.9515424966812134, "learning_rate": 5.084546217200972e-07, "loss": 1.2774, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2596 }, { "epoch": 1.6292346298619824, "grad_norm": 1.8537187576293945, "learning_rate": 5.068016001390088e-07, "loss": 1.2868, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2597 }, { "epoch": 1.6298619824341278, "grad_norm": 1.8684711456298828, "learning_rate": 5.051509668445012e-07, "loss": 1.1947, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2598 }, { "epoch": 1.6304893350062737, "grad_norm": 1.3285757303237915, "learning_rate": 5.035027238143925e-07, "loss": 1.2076, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2599 }, { "epoch": 1.631116687578419, "grad_norm": 1.7497682571411133, "learning_rate": 5.018568730236384e-07, "loss": 1.1428, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2600 }, { "epoch": 1.6317440401505645, "grad_norm": 2.2294180393218994, "learning_rate": 5.002134164443262e-07, "loss": 1.3661, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2601 }, { "epoch": 1.6323713927227101, "grad_norm": 1.6695797443389893, "learning_rate": 4.985723560456765e-07, "loss": 1.3749, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2602 }, { "epoch": 1.6329987452948558, "grad_norm": 1.7357219457626343, "learning_rate": 4.969336937940378e-07, "loss": 1.3069, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2603 }, { "epoch": 1.6336260978670012, "grad_norm": 4.611532211303711, "learning_rate": 4.952974316528833e-07, "loss": 1.3523, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2604 }, { "epoch": 1.6342534504391468, "grad_norm": 1.919092059135437, "learning_rate": 4.936635715828145e-07, "loss": 1.1854, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2605 }, { "epoch": 1.6348808030112925, "grad_norm": 1.867214560508728, "learning_rate": 4.9203211554155e-07, "loss": 1.262, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2606 }, { "epoch": 1.6355081555834379, "grad_norm": 1.6036155223846436, "learning_rate": 4.904030654839317e-07, "loss": 1.2831, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2607 }, { "epoch": 1.6361355081555833, "grad_norm": 1.7611279487609863, "learning_rate": 4.887764233619163e-07, "loss": 1.2201, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2608 }, { "epoch": 1.6367628607277291, "grad_norm": 1.7776987552642822, "learning_rate": 4.871521911245764e-07, "loss": 1.3487, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2609 }, { "epoch": 1.6373902132998746, "grad_norm": 1.8058147430419922, "learning_rate": 4.855303707180964e-07, "loss": 1.2534, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2610 }, { "epoch": 1.63801756587202, "grad_norm": 1.6661456823349, "learning_rate": 4.839109640857706e-07, "loss": 1.1765, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2611 }, { "epoch": 1.6386449184441656, "grad_norm": 1.8750739097595215, "learning_rate": 4.822939731680021e-07, "loss": 1.2644, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2612 }, { "epoch": 1.6392722710163112, "grad_norm": 1.8761341571807861, "learning_rate": 4.806793999022988e-07, "loss": 1.2247, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2613 }, { "epoch": 1.6398996235884566, "grad_norm": 1.7048670053482056, "learning_rate": 4.790672462232715e-07, "loss": 1.1726, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2614 }, { "epoch": 1.6405269761606023, "grad_norm": 1.754637360572815, "learning_rate": 4.774575140626317e-07, "loss": 1.1735, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2615 }, { "epoch": 1.641154328732748, "grad_norm": 1.8188670873641968, "learning_rate": 4.758502053491906e-07, "loss": 1.2341, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2616 }, { "epoch": 1.6417816813048933, "grad_norm": 1.8739845752716064, "learning_rate": 4.7424532200885417e-07, "loss": 1.0668, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2617 }, { "epoch": 1.6424090338770387, "grad_norm": 1.9493358135223389, "learning_rate": 4.726428659646226e-07, "loss": 1.3107, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2618 }, { "epoch": 1.6430363864491846, "grad_norm": 1.6212058067321777, "learning_rate": 4.710428391365887e-07, "loss": 1.2477, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2619 }, { "epoch": 1.64366373902133, "grad_norm": 1.7489856481552124, "learning_rate": 4.6944524344193247e-07, "loss": 1.1277, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2620 }, { "epoch": 1.6442910915934754, "grad_norm": 1.7265174388885498, "learning_rate": 4.6785008079492377e-07, "loss": 1.3101, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2621 }, { "epoch": 1.644918444165621, "grad_norm": 1.923108696937561, "learning_rate": 4.6625735310691396e-07, "loss": 1.2839, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2622 }, { "epoch": 1.6455457967377667, "grad_norm": 1.7697632312774658, "learning_rate": 4.6466706228633973e-07, "loss": 1.2111, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2623 }, { "epoch": 1.646173149309912, "grad_norm": 1.936873197555542, "learning_rate": 4.630792102387155e-07, "loss": 1.2291, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2624 }, { "epoch": 1.6468005018820577, "grad_norm": 1.563767433166504, "learning_rate": 4.614937988666343e-07, "loss": 1.22, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2625 }, { "epoch": 1.6474278544542034, "grad_norm": 1.9267933368682861, "learning_rate": 4.5991083006976607e-07, "loss": 1.296, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2626 }, { "epoch": 1.6480552070263488, "grad_norm": 1.8290936946868896, "learning_rate": 4.5833030574485097e-07, "loss": 1.1191, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2627 }, { "epoch": 1.6486825595984942, "grad_norm": 1.7071551084518433, "learning_rate": 4.567522277857042e-07, "loss": 1.2689, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2628 }, { "epoch": 1.64930991217064, "grad_norm": 1.4760491847991943, "learning_rate": 4.55176598083206e-07, "loss": 1.1789, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2629 }, { "epoch": 1.6499372647427855, "grad_norm": 1.6776413917541504, "learning_rate": 4.536034185253052e-07, "loss": 1.2469, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2630 }, { "epoch": 1.6505646173149309, "grad_norm": 1.4032368659973145, "learning_rate": 4.5203269099701296e-07, "loss": 1.1217, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2631 }, { "epoch": 1.6511919698870765, "grad_norm": 1.7307270765304565, "learning_rate": 4.504644173804054e-07, "loss": 1.2958, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2632 }, { "epoch": 1.6518193224592221, "grad_norm": 1.848423719406128, "learning_rate": 4.488985995546152e-07, "loss": 1.2684, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2633 }, { "epoch": 1.6524466750313676, "grad_norm": 1.5907129049301147, "learning_rate": 4.473352393958338e-07, "loss": 1.1161, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2634 }, { "epoch": 1.6530740276035132, "grad_norm": 1.4280104637145996, "learning_rate": 4.4577433877730865e-07, "loss": 1.0568, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2635 }, { "epoch": 1.6537013801756588, "grad_norm": 1.9540001153945923, "learning_rate": 4.4421589956933827e-07, "loss": 1.3103, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2636 }, { "epoch": 1.6543287327478042, "grad_norm": 1.7712022066116333, "learning_rate": 4.4265992363927367e-07, "loss": 1.261, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2637 }, { "epoch": 1.6549560853199499, "grad_norm": 2.0369656085968018, "learning_rate": 4.411064128515133e-07, "loss": 1.3026, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2638 }, { "epoch": 1.6555834378920955, "grad_norm": 1.9376763105392456, "learning_rate": 4.395553690675014e-07, "loss": 1.4054, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2639 }, { "epoch": 1.656210790464241, "grad_norm": 1.9523491859436035, "learning_rate": 4.380067941457278e-07, "loss": 1.155, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2640 }, { "epoch": 1.6568381430363863, "grad_norm": 1.5371203422546387, "learning_rate": 4.3646068994172233e-07, "loss": 1.1868, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2641 }, { "epoch": 1.657465495608532, "grad_norm": 1.8343383073806763, "learning_rate": 4.349170583080559e-07, "loss": 1.2096, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2642 }, { "epoch": 1.6580928481806776, "grad_norm": 1.9491466283798218, "learning_rate": 4.3337590109433505e-07, "loss": 1.1406, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2643 }, { "epoch": 1.658720200752823, "grad_norm": 1.6798421144485474, "learning_rate": 4.3183722014720375e-07, "loss": 1.254, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2644 }, { "epoch": 1.6593475533249686, "grad_norm": 1.7423664331436157, "learning_rate": 4.303010173103367e-07, "loss": 1.0916, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2645 }, { "epoch": 1.6599749058971143, "grad_norm": 1.8117595911026, "learning_rate": 4.287672944244403e-07, "loss": 1.2391, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2646 }, { "epoch": 1.6606022584692597, "grad_norm": 1.9419714212417603, "learning_rate": 4.272360533272482e-07, "loss": 1.2536, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2647 }, { "epoch": 1.6612296110414053, "grad_norm": 1.3489598035812378, "learning_rate": 4.25707295853523e-07, "loss": 1.2114, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2648 }, { "epoch": 1.661856963613551, "grad_norm": 1.895057201385498, "learning_rate": 4.2418102383504884e-07, "loss": 1.3033, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2649 }, { "epoch": 1.6624843161856964, "grad_norm": 1.6380215883255005, "learning_rate": 4.22657239100632e-07, "loss": 1.0711, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2650 }, { "epoch": 1.6631116687578418, "grad_norm": 1.456254243850708, "learning_rate": 4.211359434761003e-07, "loss": 1.1179, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2651 }, { "epoch": 1.6637390213299874, "grad_norm": 1.8337856531143188, "learning_rate": 4.196171387842976e-07, "loss": 1.2768, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2652 }, { "epoch": 1.664366373902133, "grad_norm": 1.382093906402588, "learning_rate": 4.18100826845082e-07, "loss": 1.1812, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2653 }, { "epoch": 1.6649937264742785, "grad_norm": 1.6331734657287598, "learning_rate": 4.16587009475328e-07, "loss": 1.1856, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2654 }, { "epoch": 1.665621079046424, "grad_norm": 2.566094160079956, "learning_rate": 4.1507568848891706e-07, "loss": 1.1711, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2655 }, { "epoch": 1.6662484316185697, "grad_norm": 1.8740427494049072, "learning_rate": 4.1356686569674344e-07, "loss": 1.2985, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2656 }, { "epoch": 1.6668757841907151, "grad_norm": 1.7602894306182861, "learning_rate": 4.1206054290670537e-07, "loss": 1.2358, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2657 }, { "epoch": 1.6675031367628608, "grad_norm": 1.8847901821136475, "learning_rate": 4.10556721923705e-07, "loss": 1.3158, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2658 }, { "epoch": 1.6681304893350064, "grad_norm": 1.524640440940857, "learning_rate": 4.0905540454965004e-07, "loss": 1.1987, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2659 }, { "epoch": 1.6687578419071518, "grad_norm": 1.7790837287902832, "learning_rate": 4.0755659258344434e-07, "loss": 1.1568, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2660 }, { "epoch": 1.6693851944792972, "grad_norm": 1.7558435201644897, "learning_rate": 4.060602878209932e-07, "loss": 1.1112, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2661 }, { "epoch": 1.6700125470514429, "grad_norm": 1.9152045249938965, "learning_rate": 4.045664920551959e-07, "loss": 1.2686, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2662 }, { "epoch": 1.6706398996235885, "grad_norm": 1.7652149200439453, "learning_rate": 4.0307520707594446e-07, "loss": 1.3869, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2663 }, { "epoch": 1.671267252195734, "grad_norm": 1.9139306545257568, "learning_rate": 4.015864346701251e-07, "loss": 1.3363, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2664 }, { "epoch": 1.6718946047678795, "grad_norm": 2.0330417156219482, "learning_rate": 4.001001766216117e-07, "loss": 1.2961, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2665 }, { "epoch": 1.6725219573400252, "grad_norm": 2.176459789276123, "learning_rate": 3.986164347112656e-07, "loss": 1.3539, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2666 }, { "epoch": 1.6731493099121706, "grad_norm": 1.9700795412063599, "learning_rate": 3.971352107169324e-07, "loss": 1.2131, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2667 }, { "epoch": 1.6737766624843162, "grad_norm": 1.8541291952133179, "learning_rate": 3.956565064134432e-07, "loss": 1.2052, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2668 }, { "epoch": 1.6744040150564619, "grad_norm": 1.6921569108963013, "learning_rate": 3.941803235726069e-07, "loss": 1.0926, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2669 }, { "epoch": 1.6750313676286073, "grad_norm": 2.4293434619903564, "learning_rate": 3.9270666396321404e-07, "loss": 1.2696, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2670 }, { "epoch": 1.6756587202007527, "grad_norm": 1.8745760917663574, "learning_rate": 3.9123552935102976e-07, "loss": 1.2597, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2671 }, { "epoch": 1.6762860727728983, "grad_norm": 1.8291773796081543, "learning_rate": 3.897669214987937e-07, "loss": 1.4168, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2672 }, { "epoch": 1.676913425345044, "grad_norm": 1.7694315910339355, "learning_rate": 3.883008421662196e-07, "loss": 1.2302, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2673 }, { "epoch": 1.6775407779171894, "grad_norm": 1.8569852113723755, "learning_rate": 3.8683729310998926e-07, "loss": 1.1905, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2674 }, { "epoch": 1.678168130489335, "grad_norm": 1.8917303085327148, "learning_rate": 3.8537627608375485e-07, "loss": 1.2325, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2675 }, { "epoch": 1.6787954830614806, "grad_norm": 1.6169365644454956, "learning_rate": 3.839177928381327e-07, "loss": 1.4392, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2676 }, { "epoch": 1.679422835633626, "grad_norm": 1.8867363929748535, "learning_rate": 3.8246184512070486e-07, "loss": 1.1633, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2677 }, { "epoch": 1.6800501882057717, "grad_norm": 2.2046451568603516, "learning_rate": 3.81008434676014e-07, "loss": 1.1018, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2678 }, { "epoch": 1.6806775407779173, "grad_norm": 1.8099101781845093, "learning_rate": 3.79557563245562e-07, "loss": 1.3516, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2679 }, { "epoch": 1.6813048933500627, "grad_norm": 1.7646654844284058, "learning_rate": 3.78109232567811e-07, "loss": 1.2145, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2680 }, { "epoch": 1.6819322459222081, "grad_norm": 1.616289734840393, "learning_rate": 3.766634443781761e-07, "loss": 1.2265, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2681 }, { "epoch": 1.682559598494354, "grad_norm": 1.3728324174880981, "learning_rate": 3.752202004090275e-07, "loss": 1.2257, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2682 }, { "epoch": 1.6831869510664994, "grad_norm": 1.857109785079956, "learning_rate": 3.7377950238968566e-07, "loss": 1.2864, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2683 }, { "epoch": 1.6838143036386448, "grad_norm": 1.9126770496368408, "learning_rate": 3.7234135204642195e-07, "loss": 1.2166, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2684 }, { "epoch": 1.6844416562107905, "grad_norm": 2.712088108062744, "learning_rate": 3.709057511024541e-07, "loss": 1.2432, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2685 }, { "epoch": 1.685069008782936, "grad_norm": 1.7454075813293457, "learning_rate": 3.6947270127794475e-07, "loss": 1.2589, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2686 }, { "epoch": 1.6856963613550815, "grad_norm": 1.8178859949111938, "learning_rate": 3.6804220429000116e-07, "loss": 1.2416, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2687 }, { "epoch": 1.6863237139272271, "grad_norm": 1.7671403884887695, "learning_rate": 3.666142618526697e-07, "loss": 1.4289, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2688 }, { "epoch": 1.6869510664993728, "grad_norm": 1.6030900478363037, "learning_rate": 3.651888756769381e-07, "loss": 1.3057, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2689 }, { "epoch": 1.6875784190715182, "grad_norm": 1.710805058479309, "learning_rate": 3.637660474707291e-07, "loss": 1.2315, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2690 }, { "epoch": 1.6882057716436636, "grad_norm": 1.9641602039337158, "learning_rate": 3.6234577893890235e-07, "loss": 1.1884, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2691 }, { "epoch": 1.6888331242158094, "grad_norm": 1.7865450382232666, "learning_rate": 3.609280717832489e-07, "loss": 1.2465, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2692 }, { "epoch": 1.6894604767879549, "grad_norm": 1.943463683128357, "learning_rate": 3.595129277024906e-07, "loss": 1.2143, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2693 }, { "epoch": 1.6900878293601003, "grad_norm": 1.588843822479248, "learning_rate": 3.581003483922801e-07, "loss": 1.2295, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2694 }, { "epoch": 1.690715181932246, "grad_norm": 1.748705506324768, "learning_rate": 3.5669033554519424e-07, "loss": 1.2691, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2695 }, { "epoch": 1.6913425345043915, "grad_norm": 1.7085033655166626, "learning_rate": 3.552828908507375e-07, "loss": 1.2281, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2696 }, { "epoch": 1.691969887076537, "grad_norm": 1.6979553699493408, "learning_rate": 3.538780159953348e-07, "loss": 1.2615, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2697 }, { "epoch": 1.6925972396486826, "grad_norm": 1.8602803945541382, "learning_rate": 3.5247571266233323e-07, "loss": 1.3764, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2698 }, { "epoch": 1.6932245922208282, "grad_norm": 2.2552378177642822, "learning_rate": 3.510759825319976e-07, "loss": 1.2772, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2699 }, { "epoch": 1.6938519447929736, "grad_norm": 1.6664947271347046, "learning_rate": 3.4967882728150994e-07, "loss": 1.1499, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2700 }, { "epoch": 1.694479297365119, "grad_norm": 1.85127592086792, "learning_rate": 3.48284248584968e-07, "loss": 1.2625, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2701 }, { "epoch": 1.695106649937265, "grad_norm": 1.7261724472045898, "learning_rate": 3.468922481133802e-07, "loss": 1.2575, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2702 }, { "epoch": 1.6957340025094103, "grad_norm": 1.5232841968536377, "learning_rate": 3.455028275346678e-07, "loss": 1.2934, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2703 }, { "epoch": 1.6963613550815557, "grad_norm": 1.9840103387832642, "learning_rate": 3.4411598851365967e-07, "loss": 1.3476, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2704 }, { "epoch": 1.6969887076537014, "grad_norm": 1.9216537475585938, "learning_rate": 3.4273173271209047e-07, "loss": 1.1801, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2705 }, { "epoch": 1.697616060225847, "grad_norm": 1.321739912033081, "learning_rate": 3.413500617886023e-07, "loss": 1.2259, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2706 }, { "epoch": 1.6982434127979924, "grad_norm": 1.701654314994812, "learning_rate": 3.39970977398737e-07, "loss": 1.2265, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2707 }, { "epoch": 1.698870765370138, "grad_norm": 1.864776611328125, "learning_rate": 3.3859448119493957e-07, "loss": 1.4043, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2708 }, { "epoch": 1.6994981179422837, "grad_norm": 1.7704063653945923, "learning_rate": 3.3722057482655224e-07, "loss": 1.2881, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2709 }, { "epoch": 1.700125470514429, "grad_norm": 1.7865586280822754, "learning_rate": 3.3584925993981497e-07, "loss": 1.2118, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2710 }, { "epoch": 1.7007528230865745, "grad_norm": 2.1016578674316406, "learning_rate": 3.344805381778621e-07, "loss": 1.1508, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2711 }, { "epoch": 1.7013801756587204, "grad_norm": 1.7841296195983887, "learning_rate": 3.331144111807205e-07, "loss": 1.1504, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2712 }, { "epoch": 1.7020075282308658, "grad_norm": 1.661086916923523, "learning_rate": 3.3175088058530925e-07, "loss": 1.1409, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2713 }, { "epoch": 1.7026348808030112, "grad_norm": 1.675002098083496, "learning_rate": 3.3038994802543466e-07, "loss": 1.1987, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2714 }, { "epoch": 1.7032622333751568, "grad_norm": 1.4157485961914062, "learning_rate": 3.2903161513179156e-07, "loss": 1.177, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2715 }, { "epoch": 1.7038895859473024, "grad_norm": 1.4867208003997803, "learning_rate": 3.276758835319582e-07, "loss": 1.2039, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2716 }, { "epoch": 1.7045169385194479, "grad_norm": 1.752272367477417, "learning_rate": 3.2632275485039806e-07, "loss": 1.326, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2717 }, { "epoch": 1.7051442910915935, "grad_norm": 1.7673453092575073, "learning_rate": 3.2497223070845377e-07, "loss": 1.2458, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2718 }, { "epoch": 1.7057716436637391, "grad_norm": 1.7563871145248413, "learning_rate": 3.236243127243477e-07, "loss": 1.2054, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2719 }, { "epoch": 1.7063989962358845, "grad_norm": 1.822948932647705, "learning_rate": 3.2227900251318055e-07, "loss": 1.1937, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2720 }, { "epoch": 1.70702634880803, "grad_norm": 1.6502693891525269, "learning_rate": 3.2093630168692656e-07, "loss": 1.2332, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2721 }, { "epoch": 1.7076537013801758, "grad_norm": 1.5615298748016357, "learning_rate": 3.1959621185443516e-07, "loss": 1.189, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2722 }, { "epoch": 1.7082810539523212, "grad_norm": 1.8436845541000366, "learning_rate": 3.182587346214255e-07, "loss": 1.2043, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2723 }, { "epoch": 1.7089084065244666, "grad_norm": 2.5546927452087402, "learning_rate": 3.169238715904882e-07, "loss": 1.1197, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2724 }, { "epoch": 1.7095357590966123, "grad_norm": 1.7388148307800293, "learning_rate": 3.1559162436107987e-07, "loss": 1.2678, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2725 }, { "epoch": 1.710163111668758, "grad_norm": 1.6913169622421265, "learning_rate": 3.142619945295225e-07, "loss": 1.1965, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2726 }, { "epoch": 1.7107904642409033, "grad_norm": 1.6821208000183105, "learning_rate": 3.1293498368900414e-07, "loss": 1.2556, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2727 }, { "epoch": 1.711417816813049, "grad_norm": 2.5397050380706787, "learning_rate": 3.116105934295724e-07, "loss": 1.098, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2728 }, { "epoch": 1.7120451693851946, "grad_norm": 1.8930282592773438, "learning_rate": 3.1028882533813643e-07, "loss": 1.1857, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2729 }, { "epoch": 1.71267252195734, "grad_norm": 1.8789072036743164, "learning_rate": 3.089696809984624e-07, "loss": 1.311, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2730 }, { "epoch": 1.7132998745294856, "grad_norm": 2.0739526748657227, "learning_rate": 3.076531619911735e-07, "loss": 1.1883, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2731 }, { "epoch": 1.7139272271016313, "grad_norm": 1.430984616279602, "learning_rate": 3.063392698937462e-07, "loss": 1.2454, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2732 }, { "epoch": 1.7145545796737767, "grad_norm": 1.6760432720184326, "learning_rate": 3.0502800628050946e-07, "loss": 1.2448, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2733 }, { "epoch": 1.715181932245922, "grad_norm": 1.83548104763031, "learning_rate": 3.0371937272264454e-07, "loss": 1.2657, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2734 }, { "epoch": 1.7158092848180677, "grad_norm": 2.0283336639404297, "learning_rate": 3.024133707881785e-07, "loss": 1.272, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2735 }, { "epoch": 1.7164366373902133, "grad_norm": 1.5249745845794678, "learning_rate": 3.0111000204198825e-07, "loss": 1.205, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2736 }, { "epoch": 1.7170639899623588, "grad_norm": 1.977151870727539, "learning_rate": 2.998092680457923e-07, "loss": 1.286, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2737 }, { "epoch": 1.7176913425345044, "grad_norm": 1.2337921857833862, "learning_rate": 2.98511170358155e-07, "loss": 1.1496, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2738 }, { "epoch": 1.71831869510665, "grad_norm": 2.6582040786743164, "learning_rate": 2.9721571053448056e-07, "loss": 1.1556, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2739 }, { "epoch": 1.7189460476787954, "grad_norm": 1.7614023685455322, "learning_rate": 2.9592289012701113e-07, "loss": 1.1573, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2740 }, { "epoch": 1.719573400250941, "grad_norm": 1.8160935640335083, "learning_rate": 2.9463271068482955e-07, "loss": 1.1242, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2741 }, { "epoch": 1.7202007528230867, "grad_norm": 1.9184051752090454, "learning_rate": 2.933451737538506e-07, "loss": 1.2449, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2742 }, { "epoch": 1.7208281053952321, "grad_norm": 1.8110864162445068, "learning_rate": 2.920602808768258e-07, "loss": 1.2209, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2743 }, { "epoch": 1.7214554579673775, "grad_norm": 1.450143575668335, "learning_rate": 2.9077803359333607e-07, "loss": 1.1317, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2744 }, { "epoch": 1.7220828105395232, "grad_norm": 1.7388874292373657, "learning_rate": 2.8949843343979435e-07, "loss": 1.1769, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2745 }, { "epoch": 1.7227101631116688, "grad_norm": 1.4511213302612305, "learning_rate": 2.8822148194944055e-07, "loss": 1.1455, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2746 }, { "epoch": 1.7233375156838142, "grad_norm": 1.4643691778182983, "learning_rate": 2.8694718065234105e-07, "loss": 1.2187, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2747 }, { "epoch": 1.7239648682559598, "grad_norm": 1.8728381395339966, "learning_rate": 2.856755310753867e-07, "loss": 1.2591, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2748 }, { "epoch": 1.7245922208281055, "grad_norm": 1.712773084640503, "learning_rate": 2.8440653474229083e-07, "loss": 1.315, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2749 }, { "epoch": 1.725219573400251, "grad_norm": 1.9405332803726196, "learning_rate": 2.831401931735892e-07, "loss": 1.1987, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2750 }, { "epoch": 1.7258469259723965, "grad_norm": 1.7674938440322876, "learning_rate": 2.8187650788663366e-07, "loss": 1.3141, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2751 }, { "epoch": 1.7264742785445422, "grad_norm": 1.6644740104675293, "learning_rate": 2.8061548039559625e-07, "loss": 1.1981, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2752 }, { "epoch": 1.7271016311166876, "grad_norm": 2.2521865367889404, "learning_rate": 2.793571122114627e-07, "loss": 1.2635, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2753 }, { "epoch": 1.727728983688833, "grad_norm": 1.648939609527588, "learning_rate": 2.781014048420319e-07, "loss": 1.1418, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2754 }, { "epoch": 1.7283563362609786, "grad_norm": 1.3256694078445435, "learning_rate": 2.7684835979191664e-07, "loss": 1.0624, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2755 }, { "epoch": 1.7289836888331243, "grad_norm": 1.8163894414901733, "learning_rate": 2.755979785625368e-07, "loss": 1.1717, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2756 }, { "epoch": 1.7296110414052697, "grad_norm": 1.7268973588943481, "learning_rate": 2.7435026265212327e-07, "loss": 1.2758, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2757 }, { "epoch": 1.7302383939774153, "grad_norm": 1.8375306129455566, "learning_rate": 2.731052135557111e-07, "loss": 1.2875, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2758 }, { "epoch": 1.730865746549561, "grad_norm": 1.4706637859344482, "learning_rate": 2.718628327651407e-07, "loss": 1.315, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2759 }, { "epoch": 1.7314930991217063, "grad_norm": 1.4504117965698242, "learning_rate": 2.7062312176905606e-07, "loss": 1.1671, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2760 }, { "epoch": 1.732120451693852, "grad_norm": 2.374910593032837, "learning_rate": 2.693860820529004e-07, "loss": 1.167, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2761 }, { "epoch": 1.7327478042659976, "grad_norm": 1.5370956659317017, "learning_rate": 2.681517150989185e-07, "loss": 1.3897, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2762 }, { "epoch": 1.733375156838143, "grad_norm": 1.623807668685913, "learning_rate": 2.669200223861504e-07, "loss": 1.0385, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2763 }, { "epoch": 1.7340025094102884, "grad_norm": 1.8441228866577148, "learning_rate": 2.6569100539043326e-07, "loss": 1.268, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2764 }, { "epoch": 1.734629861982434, "grad_norm": 1.8143815994262695, "learning_rate": 2.6446466558439657e-07, "loss": 1.2536, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2765 }, { "epoch": 1.7352572145545797, "grad_norm": 1.8156932592391968, "learning_rate": 2.632410044374642e-07, "loss": 1.2525, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2766 }, { "epoch": 1.7358845671267251, "grad_norm": 1.8268598318099976, "learning_rate": 2.6202002341584875e-07, "loss": 1.3156, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2767 }, { "epoch": 1.7365119196988708, "grad_norm": 1.4789742231369019, "learning_rate": 2.608017239825511e-07, "loss": 1.1006, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2768 }, { "epoch": 1.7371392722710164, "grad_norm": 1.5572457313537598, "learning_rate": 2.5958610759736133e-07, "loss": 1.2384, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2769 }, { "epoch": 1.7377666248431618, "grad_norm": 1.9117459058761597, "learning_rate": 2.5837317571685175e-07, "loss": 1.2199, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2770 }, { "epoch": 1.7383939774153074, "grad_norm": 1.9667168855667114, "learning_rate": 2.5716292979438037e-07, "loss": 1.2503, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2771 }, { "epoch": 1.739021329987453, "grad_norm": 2.239107370376587, "learning_rate": 2.559553712800852e-07, "loss": 1.203, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2772 }, { "epoch": 1.7396486825595985, "grad_norm": 2.7901501655578613, "learning_rate": 2.547505016208851e-07, "loss": 1.2129, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2773 }, { "epoch": 1.740276035131744, "grad_norm": 1.6809381246566772, "learning_rate": 2.53548322260477e-07, "loss": 1.218, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2774 }, { "epoch": 1.7409033877038897, "grad_norm": 1.3651955127716064, "learning_rate": 2.5234883463933323e-07, "loss": 1.1659, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2775 }, { "epoch": 1.7415307402760352, "grad_norm": 1.5471340417861938, "learning_rate": 2.511520401947032e-07, "loss": 1.2559, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2776 }, { "epoch": 1.7421580928481806, "grad_norm": 1.870573878288269, "learning_rate": 2.4995794036060616e-07, "loss": 1.2515, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2777 }, { "epoch": 1.7427854454203262, "grad_norm": 1.6339471340179443, "learning_rate": 2.4876653656783543e-07, "loss": 1.2609, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2778 }, { "epoch": 1.7434127979924718, "grad_norm": 1.815114974975586, "learning_rate": 2.4757783024395244e-07, "loss": 1.3496, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2779 }, { "epoch": 1.7440401505646173, "grad_norm": 1.5389330387115479, "learning_rate": 2.463918228132864e-07, "loss": 1.1197, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2780 }, { "epoch": 1.7446675031367629, "grad_norm": 1.1387971639633179, "learning_rate": 2.4520851569693275e-07, "loss": 1.1247, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2781 }, { "epoch": 1.7452948557089085, "grad_norm": 2.002338171005249, "learning_rate": 2.440279103127524e-07, "loss": 1.2492, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2782 }, { "epoch": 1.745922208281054, "grad_norm": 1.8052780628204346, "learning_rate": 2.428500080753676e-07, "loss": 1.2869, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2783 }, { "epoch": 1.7465495608531993, "grad_norm": 1.9134352207183838, "learning_rate": 2.416748103961625e-07, "loss": 1.13, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2784 }, { "epoch": 1.7471769134253452, "grad_norm": 1.935655951499939, "learning_rate": 2.4050231868328026e-07, "loss": 1.3445, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2785 }, { "epoch": 1.7478042659974906, "grad_norm": 1.5137767791748047, "learning_rate": 2.3933253434162193e-07, "loss": 1.0256, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2786 }, { "epoch": 1.748431618569636, "grad_norm": 1.7359564304351807, "learning_rate": 2.381654587728438e-07, "loss": 1.268, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2787 }, { "epoch": 1.7490589711417817, "grad_norm": 1.6612578630447388, "learning_rate": 2.3700109337535803e-07, "loss": 1.4344, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2788 }, { "epoch": 1.7496863237139273, "grad_norm": 1.733493685722351, "learning_rate": 2.3583943954432725e-07, "loss": 1.3073, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2789 }, { "epoch": 1.7503136762860727, "grad_norm": 1.6784566640853882, "learning_rate": 2.3468049867166747e-07, "loss": 1.1749, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2790 }, { "epoch": 1.7509410288582183, "grad_norm": 1.6308798789978027, "learning_rate": 2.335242721460415e-07, "loss": 1.2556, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2791 }, { "epoch": 1.751568381430364, "grad_norm": 1.7757388353347778, "learning_rate": 2.3237076135286224e-07, "loss": 1.3496, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2792 }, { "epoch": 1.7521957340025094, "grad_norm": 2.260532855987549, "learning_rate": 2.3121996767428655e-07, "loss": 1.1303, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2793 }, { "epoch": 1.7528230865746548, "grad_norm": 1.8579128980636597, "learning_rate": 2.300718924892159e-07, "loss": 1.3093, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2794 }, { "epoch": 1.7534504391468007, "grad_norm": 1.677127718925476, "learning_rate": 2.28926537173296e-07, "loss": 1.2947, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2795 }, { "epoch": 1.754077791718946, "grad_norm": 1.9110498428344727, "learning_rate": 2.2778390309891178e-07, "loss": 1.1955, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2796 }, { "epoch": 1.7547051442910915, "grad_norm": 1.6374671459197998, "learning_rate": 2.2664399163518786e-07, "loss": 1.2818, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2797 }, { "epoch": 1.7553324968632371, "grad_norm": 1.7491819858551025, "learning_rate": 2.255068041479877e-07, "loss": 1.2249, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2798 }, { "epoch": 1.7559598494353827, "grad_norm": 1.700737476348877, "learning_rate": 2.243723419999097e-07, "loss": 1.1558, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2799 }, { "epoch": 1.7565872020075282, "grad_norm": 1.699294090270996, "learning_rate": 2.232406065502868e-07, "loss": 1.2829, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2800 }, { "epoch": 1.7572145545796738, "grad_norm": 1.671245813369751, "learning_rate": 2.2211159915518477e-07, "loss": 1.1871, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2801 }, { "epoch": 1.7578419071518194, "grad_norm": 1.7521545886993408, "learning_rate": 2.2098532116740152e-07, "loss": 1.3888, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2802 }, { "epoch": 1.7584692597239648, "grad_norm": 1.4503552913665771, "learning_rate": 2.1986177393646307e-07, "loss": 1.1318, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2803 }, { "epoch": 1.7590966122961103, "grad_norm": 2.4087252616882324, "learning_rate": 2.1874095880862505e-07, "loss": 1.3339, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2804 }, { "epoch": 1.759723964868256, "grad_norm": 1.7991889715194702, "learning_rate": 2.176228771268682e-07, "loss": 1.1475, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2805 }, { "epoch": 1.7603513174404015, "grad_norm": 1.8192427158355713, "learning_rate": 2.165075302308975e-07, "loss": 1.1967, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2806 }, { "epoch": 1.760978670012547, "grad_norm": 1.9129115343093872, "learning_rate": 2.1539491945714337e-07, "loss": 1.264, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2807 }, { "epoch": 1.7616060225846926, "grad_norm": 1.306735634803772, "learning_rate": 2.1428504613875466e-07, "loss": 1.0545, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2808 }, { "epoch": 1.7622333751568382, "grad_norm": 1.6830686330795288, "learning_rate": 2.131779116056032e-07, "loss": 1.3171, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2809 }, { "epoch": 1.7628607277289836, "grad_norm": 1.8796741962432861, "learning_rate": 2.1207351718427667e-07, "loss": 1.1889, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2810 }, { "epoch": 1.7634880803011292, "grad_norm": 1.8853201866149902, "learning_rate": 2.1097186419808151e-07, "loss": 1.1654, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2811 }, { "epoch": 1.7641154328732749, "grad_norm": 1.8712329864501953, "learning_rate": 2.0987295396703772e-07, "loss": 1.2522, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2812 }, { "epoch": 1.7647427854454203, "grad_norm": 1.628964900970459, "learning_rate": 2.0877678780787918e-07, "loss": 1.1069, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2813 }, { "epoch": 1.765370138017566, "grad_norm": 1.801734447479248, "learning_rate": 2.0768336703405335e-07, "loss": 1.1379, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2814 }, { "epoch": 1.7659974905897116, "grad_norm": 1.8092901706695557, "learning_rate": 2.0659269295571603e-07, "loss": 1.1901, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2815 }, { "epoch": 1.766624843161857, "grad_norm": 3.3052401542663574, "learning_rate": 2.0550476687973274e-07, "loss": 1.1334, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2816 }, { "epoch": 1.7672521957340024, "grad_norm": 1.5364779233932495, "learning_rate": 2.04419590109676e-07, "loss": 1.2076, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2817 }, { "epoch": 1.767879548306148, "grad_norm": 1.5080270767211914, "learning_rate": 2.0333716394582536e-07, "loss": 1.1618, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2818 }, { "epoch": 1.7685069008782937, "grad_norm": 1.584987759590149, "learning_rate": 2.0225748968516284e-07, "loss": 1.1514, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2819 }, { "epoch": 1.769134253450439, "grad_norm": 1.841313362121582, "learning_rate": 2.0118056862137358e-07, "loss": 1.1889, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2820 }, { "epoch": 1.7697616060225847, "grad_norm": 1.4839106798171997, "learning_rate": 2.0010640204484455e-07, "loss": 1.124, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2821 }, { "epoch": 1.7703889585947303, "grad_norm": 2.0453951358795166, "learning_rate": 1.9903499124266118e-07, "loss": 1.2448, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2822 }, { "epoch": 1.7710163111668757, "grad_norm": 1.7970421314239502, "learning_rate": 1.9796633749860795e-07, "loss": 1.4263, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2823 }, { "epoch": 1.7716436637390214, "grad_norm": 1.8288103342056274, "learning_rate": 1.9690044209316445e-07, "loss": 1.1157, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2824 }, { "epoch": 1.772271016311167, "grad_norm": 1.8434520959854126, "learning_rate": 1.958373063035071e-07, "loss": 1.3835, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2825 }, { "epoch": 1.7728983688833124, "grad_norm": 1.594417691230774, "learning_rate": 1.947769314035036e-07, "loss": 1.1717, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2826 }, { "epoch": 1.7735257214554578, "grad_norm": 1.8379368782043457, "learning_rate": 1.937193186637143e-07, "loss": 1.2795, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2827 }, { "epoch": 1.7741530740276035, "grad_norm": 1.6096771955490112, "learning_rate": 1.926644693513907e-07, "loss": 1.2423, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2828 }, { "epoch": 1.774780426599749, "grad_norm": 1.4958261251449585, "learning_rate": 1.916123847304721e-07, "loss": 1.2482, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2829 }, { "epoch": 1.7754077791718945, "grad_norm": 2.062771797180176, "learning_rate": 1.9056306606158563e-07, "loss": 1.2613, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2830 }, { "epoch": 1.7760351317440402, "grad_norm": 1.9094319343566895, "learning_rate": 1.895165146020439e-07, "loss": 1.2073, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2831 }, { "epoch": 1.7766624843161858, "grad_norm": 1.3328020572662354, "learning_rate": 1.8847273160584378e-07, "loss": 1.1798, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2832 }, { "epoch": 1.7772898368883312, "grad_norm": 1.8016719818115234, "learning_rate": 1.8743171832366512e-07, "loss": 1.106, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2833 }, { "epoch": 1.7779171894604768, "grad_norm": 1.3392291069030762, "learning_rate": 1.8639347600286877e-07, "loss": 1.2876, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2834 }, { "epoch": 1.7785445420326225, "grad_norm": 1.6724977493286133, "learning_rate": 1.8535800588749598e-07, "loss": 1.1286, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2835 }, { "epoch": 1.7791718946047679, "grad_norm": 1.430114984512329, "learning_rate": 1.8432530921826537e-07, "loss": 1.1618, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2836 }, { "epoch": 1.7797992471769133, "grad_norm": 1.1978938579559326, "learning_rate": 1.8329538723257352e-07, "loss": 1.209, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2837 }, { "epoch": 1.780426599749059, "grad_norm": 1.695947527885437, "learning_rate": 1.8226824116449076e-07, "loss": 1.3051, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2838 }, { "epoch": 1.7810539523212046, "grad_norm": 1.7131558656692505, "learning_rate": 1.8124387224476347e-07, "loss": 1.2666, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2839 }, { "epoch": 1.78168130489335, "grad_norm": 1.789499044418335, "learning_rate": 1.802222817008084e-07, "loss": 1.1785, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2840 }, { "epoch": 1.7823086574654956, "grad_norm": 1.9134174585342407, "learning_rate": 1.7920347075671335e-07, "loss": 1.3004, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2841 }, { "epoch": 1.7829360100376412, "grad_norm": 1.8754029273986816, "learning_rate": 1.7818744063323735e-07, "loss": 1.2351, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2842 }, { "epoch": 1.7835633626097867, "grad_norm": 1.7078591585159302, "learning_rate": 1.7717419254780488e-07, "loss": 1.0923, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2843 }, { "epoch": 1.7841907151819323, "grad_norm": 1.804856538772583, "learning_rate": 1.761637277145095e-07, "loss": 1.0874, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2844 }, { "epoch": 1.784818067754078, "grad_norm": 1.6386200189590454, "learning_rate": 1.7515604734410745e-07, "loss": 1.195, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2845 }, { "epoch": 1.7854454203262233, "grad_norm": 1.5458471775054932, "learning_rate": 1.7415115264402065e-07, "loss": 1.2134, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2846 }, { "epoch": 1.7860727728983687, "grad_norm": 1.5821287631988525, "learning_rate": 1.7314904481833178e-07, "loss": 1.2658, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2847 }, { "epoch": 1.7867001254705144, "grad_norm": 1.736161231994629, "learning_rate": 1.7214972506778476e-07, "loss": 1.1598, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2848 }, { "epoch": 1.78732747804266, "grad_norm": 1.7591357231140137, "learning_rate": 1.7115319458978236e-07, "loss": 1.2577, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2849 }, { "epoch": 1.7879548306148054, "grad_norm": 1.8174993991851807, "learning_rate": 1.7015945457838524e-07, "loss": 1.1333, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2850 }, { "epoch": 1.788582183186951, "grad_norm": 1.7169201374053955, "learning_rate": 1.6916850622431175e-07, "loss": 1.3502, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2851 }, { "epoch": 1.7892095357590967, "grad_norm": 2.1507155895233154, "learning_rate": 1.681803507149335e-07, "loss": 1.2511, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2852 }, { "epoch": 1.789836888331242, "grad_norm": 1.5810800790786743, "learning_rate": 1.6719498923427697e-07, "loss": 1.1941, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2853 }, { "epoch": 1.7904642409033877, "grad_norm": 1.6978322267532349, "learning_rate": 1.6621242296301966e-07, "loss": 1.2615, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2854 }, { "epoch": 1.7910915934755334, "grad_norm": 1.8557056188583374, "learning_rate": 1.6523265307849035e-07, "loss": 1.1719, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2855 }, { "epoch": 1.7917189460476788, "grad_norm": 1.9752377271652222, "learning_rate": 1.6425568075466775e-07, "loss": 1.2727, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2856 }, { "epoch": 1.7923462986198242, "grad_norm": 1.8385305404663086, "learning_rate": 1.6328150716217682e-07, "loss": 1.1641, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2857 }, { "epoch": 1.79297365119197, "grad_norm": 4.611957550048828, "learning_rate": 1.6231013346829138e-07, "loss": 1.258, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2858 }, { "epoch": 1.7936010037641155, "grad_norm": 1.7183728218078613, "learning_rate": 1.6134156083692843e-07, "loss": 1.2715, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2859 }, { "epoch": 1.7942283563362609, "grad_norm": 1.6445415019989014, "learning_rate": 1.6037579042864876e-07, "loss": 1.1439, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2860 }, { "epoch": 1.7948557089084065, "grad_norm": 1.642172932624817, "learning_rate": 1.59412823400657e-07, "loss": 1.1275, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2861 }, { "epoch": 1.7954830614805521, "grad_norm": 1.7930870056152344, "learning_rate": 1.584526609067971e-07, "loss": 1.1603, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2862 }, { "epoch": 1.7961104140526976, "grad_norm": 1.725569248199463, "learning_rate": 1.574953040975538e-07, "loss": 1.1794, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2863 }, { "epoch": 1.7967377666248432, "grad_norm": 1.5991744995117188, "learning_rate": 1.5654075412004893e-07, "loss": 1.3509, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2864 }, { "epoch": 1.7973651191969888, "grad_norm": 1.9339492321014404, "learning_rate": 1.55589012118042e-07, "loss": 1.18, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2865 }, { "epoch": 1.7979924717691342, "grad_norm": 1.4420740604400635, "learning_rate": 1.5464007923192719e-07, "loss": 1.2373, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2866 }, { "epoch": 1.7986198243412796, "grad_norm": 1.912462592124939, "learning_rate": 1.5369395659873305e-07, "loss": 1.3134, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2867 }, { "epoch": 1.7992471769134255, "grad_norm": 1.7321563959121704, "learning_rate": 1.5275064535212186e-07, "loss": 1.1722, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2868 }, { "epoch": 1.799874529485571, "grad_norm": 1.7911001443862915, "learning_rate": 1.5181014662238507e-07, "loss": 1.1812, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2869 }, { "epoch": 1.8005018820577163, "grad_norm": 1.928369164466858, "learning_rate": 1.5087246153644651e-07, "loss": 1.3139, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2870 }, { "epoch": 1.801129234629862, "grad_norm": 1.7555097341537476, "learning_rate": 1.4993759121785635e-07, "loss": 1.1508, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2871 }, { "epoch": 1.8017565872020076, "grad_norm": 1.8406260013580322, "learning_rate": 1.4900553678679457e-07, "loss": 1.1167, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2872 }, { "epoch": 1.802383939774153, "grad_norm": 1.290350317955017, "learning_rate": 1.4807629936006485e-07, "loss": 1.13, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2873 }, { "epoch": 1.8030112923462986, "grad_norm": 1.650330662727356, "learning_rate": 1.471498800510962e-07, "loss": 1.2608, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2874 }, { "epoch": 1.8036386449184443, "grad_norm": 1.4686672687530518, "learning_rate": 1.4622627996994154e-07, "loss": 1.1727, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2875 }, { "epoch": 1.8042659974905897, "grad_norm": 2.0812888145446777, "learning_rate": 1.4530550022327532e-07, "loss": 1.1651, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2876 }, { "epoch": 1.804893350062735, "grad_norm": 1.7088572978973389, "learning_rate": 1.4438754191439254e-07, "loss": 1.2074, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2877 }, { "epoch": 1.805520702634881, "grad_norm": 1.4053688049316406, "learning_rate": 1.4347240614320717e-07, "loss": 1.2422, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2878 }, { "epoch": 1.8061480552070264, "grad_norm": 1.803846836090088, "learning_rate": 1.4256009400625216e-07, "loss": 1.3032, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2879 }, { "epoch": 1.8067754077791718, "grad_norm": 2.0060648918151855, "learning_rate": 1.4165060659667606e-07, "loss": 1.231, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2880 }, { "epoch": 1.8074027603513174, "grad_norm": 1.4810571670532227, "learning_rate": 1.407439450042433e-07, "loss": 1.1486, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2881 }, { "epoch": 1.808030112923463, "grad_norm": 1.8344682455062866, "learning_rate": 1.398401103153324e-07, "loss": 1.2441, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2882 }, { "epoch": 1.8086574654956085, "grad_norm": 1.9145503044128418, "learning_rate": 1.3893910361293422e-07, "loss": 1.2898, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2883 }, { "epoch": 1.809284818067754, "grad_norm": 1.8603219985961914, "learning_rate": 1.3804092597665186e-07, "loss": 1.1516, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2884 }, { "epoch": 1.8099121706398997, "grad_norm": 1.771806240081787, "learning_rate": 1.3714557848269777e-07, "loss": 1.3643, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2885 }, { "epoch": 1.8105395232120451, "grad_norm": 2.0314760208129883, "learning_rate": 1.3625306220389378e-07, "loss": 1.2896, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2886 }, { "epoch": 1.8111668757841906, "grad_norm": 1.787047028541565, "learning_rate": 1.3536337820966915e-07, "loss": 1.2796, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2887 }, { "epoch": 1.8117942283563364, "grad_norm": 1.9423733949661255, "learning_rate": 1.3447652756605894e-07, "loss": 1.122, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2888 }, { "epoch": 1.8124215809284818, "grad_norm": 1.8853334188461304, "learning_rate": 1.3359251133570422e-07, "loss": 1.3846, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2889 }, { "epoch": 1.8130489335006272, "grad_norm": 1.9998983144760132, "learning_rate": 1.3271133057784906e-07, "loss": 1.3012, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2890 }, { "epoch": 1.8136762860727729, "grad_norm": 1.855903148651123, "learning_rate": 1.3183298634834086e-07, "loss": 1.2702, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2891 }, { "epoch": 1.8143036386449185, "grad_norm": 2.0532174110412598, "learning_rate": 1.3095747969962686e-07, "loss": 1.1433, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2892 }, { "epoch": 1.814930991217064, "grad_norm": 1.7956229448318481, "learning_rate": 1.3008481168075571e-07, "loss": 1.1543, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2893 }, { "epoch": 1.8155583437892095, "grad_norm": 1.9425783157348633, "learning_rate": 1.2921498333737375e-07, "loss": 1.3422, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2894 }, { "epoch": 1.8161856963613552, "grad_norm": 1.777909278869629, "learning_rate": 1.283479957117248e-07, "loss": 1.317, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2895 }, { "epoch": 1.8168130489335006, "grad_norm": 1.7789652347564697, "learning_rate": 1.2748384984265004e-07, "loss": 1.3224, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2896 }, { "epoch": 1.817440401505646, "grad_norm": 1.7923486232757568, "learning_rate": 1.2662254676558406e-07, "loss": 1.2084, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2897 }, { "epoch": 1.8180677540777919, "grad_norm": 1.7949541807174683, "learning_rate": 1.2576408751255624e-07, "loss": 1.294, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2898 }, { "epoch": 1.8186951066499373, "grad_norm": 1.7665704488754272, "learning_rate": 1.2490847311218773e-07, "loss": 1.3085, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2899 }, { "epoch": 1.8193224592220827, "grad_norm": 1.8380930423736572, "learning_rate": 1.2405570458969173e-07, "loss": 1.3413, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2900 }, { "epoch": 1.8199498117942283, "grad_norm": 1.823759913444519, "learning_rate": 1.232057829668709e-07, "loss": 1.3157, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2901 }, { "epoch": 1.820577164366374, "grad_norm": 1.807655930519104, "learning_rate": 1.223587092621162e-07, "loss": 1.3335, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2902 }, { "epoch": 1.8212045169385194, "grad_norm": 1.7860008478164673, "learning_rate": 1.2151448449040759e-07, "loss": 1.3931, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2903 }, { "epoch": 1.821831869510665, "grad_norm": 0.9793686270713806, "learning_rate": 1.2067310966330958e-07, "loss": 1.1069, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2904 }, { "epoch": 1.8224592220828106, "grad_norm": 1.4274057149887085, "learning_rate": 1.1983458578897404e-07, "loss": 1.2951, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2905 }, { "epoch": 1.823086574654956, "grad_norm": 1.787569284439087, "learning_rate": 1.1899891387213425e-07, "loss": 1.1813, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2906 }, { "epoch": 1.8237139272271017, "grad_norm": 2.3724148273468018, "learning_rate": 1.1816609491410864e-07, "loss": 1.2176, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2907 }, { "epoch": 1.8243412797992473, "grad_norm": 1.8867779970169067, "learning_rate": 1.1733612991279536e-07, "loss": 1.2545, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2908 }, { "epoch": 1.8249686323713927, "grad_norm": 1.7058035135269165, "learning_rate": 1.1650901986267365e-07, "loss": 1.1629, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2909 }, { "epoch": 1.8255959849435381, "grad_norm": 1.5623539686203003, "learning_rate": 1.1568476575480186e-07, "loss": 1.1691, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2910 }, { "epoch": 1.8262233375156838, "grad_norm": 1.742311716079712, "learning_rate": 1.1486336857681635e-07, "loss": 1.2359, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2911 }, { "epoch": 1.8268506900878294, "grad_norm": 1.8960965871810913, "learning_rate": 1.1404482931293009e-07, "loss": 1.2743, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2912 }, { "epoch": 1.8274780426599748, "grad_norm": 1.8217061758041382, "learning_rate": 1.1322914894393188e-07, "loss": 1.3185, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2913 }, { "epoch": 1.8281053952321205, "grad_norm": 1.695849895477295, "learning_rate": 1.1241632844718464e-07, "loss": 1.2637, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2914 }, { "epoch": 1.828732747804266, "grad_norm": 1.9500839710235596, "learning_rate": 1.1160636879662456e-07, "loss": 1.2737, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2915 }, { "epoch": 1.8293601003764115, "grad_norm": 1.6682372093200684, "learning_rate": 1.1079927096275978e-07, "loss": 1.1241, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2916 }, { "epoch": 1.8299874529485571, "grad_norm": 1.7813045978546143, "learning_rate": 1.099950359126703e-07, "loss": 1.3649, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2917 }, { "epoch": 1.8306148055207028, "grad_norm": 1.7439370155334473, "learning_rate": 1.0919366461000447e-07, "loss": 1.1933, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2918 }, { "epoch": 1.8312421580928482, "grad_norm": 2.0448148250579834, "learning_rate": 1.0839515801498085e-07, "loss": 1.1745, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2919 }, { "epoch": 1.8318695106649936, "grad_norm": 1.9162275791168213, "learning_rate": 1.0759951708438382e-07, "loss": 1.2988, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2920 }, { "epoch": 1.8324968632371392, "grad_norm": 1.8003709316253662, "learning_rate": 1.0680674277156467e-07, "loss": 1.195, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2921 }, { "epoch": 1.8331242158092849, "grad_norm": 1.7913392782211304, "learning_rate": 1.0601683602644102e-07, "loss": 1.3184, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2922 }, { "epoch": 1.8337515683814303, "grad_norm": 1.3972041606903076, "learning_rate": 1.052297977954922e-07, "loss": 1.1933, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2923 }, { "epoch": 1.834378920953576, "grad_norm": 2.2087419033050537, "learning_rate": 1.0444562902176297e-07, "loss": 1.1391, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2924 }, { "epoch": 1.8350062735257215, "grad_norm": 1.5424485206604004, "learning_rate": 1.0366433064485759e-07, "loss": 1.1697, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2925 }, { "epoch": 1.835633626097867, "grad_norm": 1.494396686553955, "learning_rate": 1.0288590360094275e-07, "loss": 1.1096, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2926 }, { "epoch": 1.8362609786700126, "grad_norm": 2.128662586212158, "learning_rate": 1.0211034882274345e-07, "loss": 1.1278, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2927 }, { "epoch": 1.8368883312421582, "grad_norm": 1.8385462760925293, "learning_rate": 1.0133766723954358e-07, "loss": 1.2563, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2928 }, { "epoch": 1.8375156838143036, "grad_norm": 1.823840856552124, "learning_rate": 1.0056785977718448e-07, "loss": 1.3391, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2929 }, { "epoch": 1.838143036386449, "grad_norm": 1.6410809755325317, "learning_rate": 9.98009273580633e-08, "loss": 1.116, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2930 }, { "epoch": 1.8387703889585947, "grad_norm": 1.597440242767334, "learning_rate": 9.903687090113246e-08, "loss": 1.1979, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2931 }, { "epoch": 1.8393977415307403, "grad_norm": 1.8229918479919434, "learning_rate": 9.827569132189824e-08, "loss": 1.2552, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2932 }, { "epoch": 1.8400250941028857, "grad_norm": 1.5379549264907837, "learning_rate": 9.751738953241996e-08, "loss": 1.2338, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2933 }, { "epoch": 1.8406524466750314, "grad_norm": 2.0025312900543213, "learning_rate": 9.676196644130858e-08, "loss": 1.0986, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2934 }, { "epoch": 1.841279799247177, "grad_norm": 1.8701595067977905, "learning_rate": 9.600942295372562e-08, "loss": 1.2783, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2935 }, { "epoch": 1.8419071518193224, "grad_norm": 1.8206629753112793, "learning_rate": 9.525975997138254e-08, "loss": 1.2304, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2936 }, { "epoch": 1.842534504391468, "grad_norm": 1.8754552602767944, "learning_rate": 9.451297839253915e-08, "loss": 1.2556, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2937 }, { "epoch": 1.8431618569636137, "grad_norm": 1.714375376701355, "learning_rate": 9.3769079112003e-08, "loss": 1.1894, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2938 }, { "epoch": 1.843789209535759, "grad_norm": 1.7647253274917603, "learning_rate": 9.302806302112694e-08, "loss": 1.1737, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2939 }, { "epoch": 1.8444165621079045, "grad_norm": 1.630103349685669, "learning_rate": 9.228993100781097e-08, "loss": 1.2667, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2940 }, { "epoch": 1.8450439146800501, "grad_norm": 1.5752959251403809, "learning_rate": 9.155468395649764e-08, "loss": 1.2148, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2941 }, { "epoch": 1.8456712672521958, "grad_norm": 1.9237995147705078, "learning_rate": 9.082232274817332e-08, "loss": 1.18, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2942 }, { "epoch": 1.8462986198243412, "grad_norm": 1.7361148595809937, "learning_rate": 9.00928482603669e-08, "loss": 1.306, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2943 }, { "epoch": 1.8469259723964868, "grad_norm": 1.6503828763961792, "learning_rate": 8.936626136714754e-08, "loss": 1.4332, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2944 }, { "epoch": 1.8475533249686324, "grad_norm": 2.4304773807525635, "learning_rate": 8.864256293912521e-08, "loss": 1.346, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2945 }, { "epoch": 1.8481806775407779, "grad_norm": 1.8197497129440308, "learning_rate": 8.792175384344848e-08, "loss": 1.0988, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2946 }, { "epoch": 1.8488080301129235, "grad_norm": 1.7978287935256958, "learning_rate": 8.720383494380397e-08, "loss": 1.282, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2947 }, { "epoch": 1.8494353826850691, "grad_norm": 1.7161986827850342, "learning_rate": 8.648880710041496e-08, "loss": 1.2886, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2948 }, { "epoch": 1.8500627352572145, "grad_norm": 1.6135600805282593, "learning_rate": 8.577667117004084e-08, "loss": 1.1518, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2949 }, { "epoch": 1.85069008782936, "grad_norm": 1.6712685823440552, "learning_rate": 8.506742800597601e-08, "loss": 1.3558, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2950 }, { "epoch": 1.8513174404015058, "grad_norm": 1.5128730535507202, "learning_rate": 8.436107845804842e-08, "loss": 1.1955, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2951 }, { "epoch": 1.8519447929736512, "grad_norm": 1.6145243644714355, "learning_rate": 8.365762337261885e-08, "loss": 1.2239, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2952 }, { "epoch": 1.8525721455457966, "grad_norm": 1.9505369663238525, "learning_rate": 8.295706359257998e-08, "loss": 1.1648, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2953 }, { "epoch": 1.8531994981179423, "grad_norm": 1.492814302444458, "learning_rate": 8.225939995735593e-08, "loss": 1.2614, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2954 }, { "epoch": 1.853826850690088, "grad_norm": 1.5193251371383667, "learning_rate": 8.15646333028991e-08, "loss": 1.3263, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2955 }, { "epoch": 1.8544542032622333, "grad_norm": 1.7506705522537231, "learning_rate": 8.087276446169162e-08, "loss": 1.3393, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2956 }, { "epoch": 1.855081555834379, "grad_norm": 1.7187601327896118, "learning_rate": 8.018379426274397e-08, "loss": 1.2017, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2957 }, { "epoch": 1.8557089084065246, "grad_norm": 1.3737879991531372, "learning_rate": 7.949772353159191e-08, "loss": 1.104, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2958 }, { "epoch": 1.85633626097867, "grad_norm": 1.9042035341262817, "learning_rate": 7.881455309029895e-08, "loss": 1.216, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2959 }, { "epoch": 1.8569636135508154, "grad_norm": 1.4430084228515625, "learning_rate": 7.813428375745142e-08, "loss": 1.2731, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2960 }, { "epoch": 1.8575909661229613, "grad_norm": 1.2014703750610352, "learning_rate": 7.745691634816032e-08, "loss": 1.1147, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2961 }, { "epoch": 1.8582183186951067, "grad_norm": 1.566699504852295, "learning_rate": 7.678245167406061e-08, "loss": 1.1803, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2962 }, { "epoch": 1.858845671267252, "grad_norm": 1.5141801834106445, "learning_rate": 7.611089054330723e-08, "loss": 1.1781, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2963 }, { "epoch": 1.8594730238393977, "grad_norm": 1.6204683780670166, "learning_rate": 7.544223376057702e-08, "loss": 1.1162, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2964 }, { "epoch": 1.8601003764115434, "grad_norm": 1.9046368598937988, "learning_rate": 7.477648212706746e-08, "loss": 1.33, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2965 }, { "epoch": 1.8607277289836888, "grad_norm": 1.694454550743103, "learning_rate": 7.411363644049346e-08, "loss": 1.0354, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2966 }, { "epoch": 1.8613550815558344, "grad_norm": 1.9594048261642456, "learning_rate": 7.345369749508891e-08, "loss": 1.2649, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2967 }, { "epoch": 1.86198243412798, "grad_norm": 1.9656914472579956, "learning_rate": 7.279666608160458e-08, "loss": 1.2194, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2968 }, { "epoch": 1.8626097867001254, "grad_norm": 1.7213078737258911, "learning_rate": 7.214254298730794e-08, "loss": 1.3395, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2969 }, { "epoch": 1.8632371392722709, "grad_norm": 1.6851757764816284, "learning_rate": 7.149132899598066e-08, "loss": 1.2267, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2970 }, { "epoch": 1.8638644918444167, "grad_norm": 1.0846415758132935, "learning_rate": 7.084302488791994e-08, "loss": 1.1227, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2971 }, { "epoch": 1.8644918444165621, "grad_norm": 1.6302671432495117, "learning_rate": 7.019763143993441e-08, "loss": 1.2335, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2972 }, { "epoch": 1.8651191969887075, "grad_norm": 1.5246875286102295, "learning_rate": 6.955514942534742e-08, "loss": 1.3178, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2973 }, { "epoch": 1.8657465495608532, "grad_norm": 1.6435633897781372, "learning_rate": 6.891557961399175e-08, "loss": 1.3119, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2974 }, { "epoch": 1.8663739021329988, "grad_norm": 1.7738655805587769, "learning_rate": 6.827892277221193e-08, "loss": 1.2928, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2975 }, { "epoch": 1.8670012547051442, "grad_norm": 1.7210484743118286, "learning_rate": 6.764517966286188e-08, "loss": 1.2324, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2976 }, { "epoch": 1.8676286072772899, "grad_norm": 1.6799976825714111, "learning_rate": 6.701435104530363e-08, "loss": 1.1218, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2977 }, { "epoch": 1.8682559598494355, "grad_norm": 1.6705591678619385, "learning_rate": 6.638643767540837e-08, "loss": 1.1626, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2978 }, { "epoch": 1.868883312421581, "grad_norm": 1.8721472024917603, "learning_rate": 6.576144030555259e-08, "loss": 1.3582, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2979 }, { "epoch": 1.8695106649937263, "grad_norm": 1.9409172534942627, "learning_rate": 6.513935968461948e-08, "loss": 1.3134, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2980 }, { "epoch": 1.8701380175658722, "grad_norm": 1.959789752960205, "learning_rate": 6.45201965579978e-08, "loss": 1.2429, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2981 }, { "epoch": 1.8707653701380176, "grad_norm": 1.5382444858551025, "learning_rate": 6.39039516675799e-08, "loss": 1.1036, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2982 }, { "epoch": 1.871392722710163, "grad_norm": 1.5872652530670166, "learning_rate": 6.329062575176182e-08, "loss": 1.1977, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2983 }, { "epoch": 1.8720200752823086, "grad_norm": 2.0065999031066895, "learning_rate": 6.268021954544095e-08, "loss": 1.1706, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2984 }, { "epoch": 1.8726474278544543, "grad_norm": 1.7338385581970215, "learning_rate": 6.207273378001837e-08, "loss": 1.4278, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2985 }, { "epoch": 1.8732747804265997, "grad_norm": 1.912854552268982, "learning_rate": 6.14681691833935e-08, "loss": 1.2516, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2986 }, { "epoch": 1.8739021329987453, "grad_norm": 1.60573148727417, "learning_rate": 6.086652647996738e-08, "loss": 1.334, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2987 }, { "epoch": 1.874529485570891, "grad_norm": 2.0827462673187256, "learning_rate": 6.026780639063895e-08, "loss": 1.1661, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2988 }, { "epoch": 1.8751568381430364, "grad_norm": 1.6736165285110474, "learning_rate": 5.967200963280545e-08, "loss": 1.3655, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2989 }, { "epoch": 1.875784190715182, "grad_norm": 1.7067066431045532, "learning_rate": 5.9079136920361376e-08, "loss": 1.1578, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2990 }, { "epoch": 1.8764115432873276, "grad_norm": 1.3567057847976685, "learning_rate": 5.848918896369765e-08, "loss": 1.1204, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2991 }, { "epoch": 1.877038895859473, "grad_norm": 2.0910308361053467, "learning_rate": 5.7902166469701036e-08, "loss": 1.2968, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2992 }, { "epoch": 1.8776662484316184, "grad_norm": 1.7153366804122925, "learning_rate": 5.731807014175195e-08, "loss": 1.2327, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2993 }, { "epoch": 1.878293601003764, "grad_norm": 1.6810442209243774, "learning_rate": 5.673690067972554e-08, "loss": 1.1654, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2994 }, { "epoch": 1.8789209535759097, "grad_norm": 1.9211241006851196, "learning_rate": 5.615865877998977e-08, "loss": 1.1672, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2995 }, { "epoch": 1.8795483061480551, "grad_norm": 2.5992534160614014, "learning_rate": 5.558334513540403e-08, "loss": 1.1507, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2996 }, { "epoch": 1.8801756587202008, "grad_norm": 2.0081636905670166, "learning_rate": 5.5010960435320224e-08, "loss": 1.2349, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2997 }, { "epoch": 1.8808030112923464, "grad_norm": 1.8557283878326416, "learning_rate": 5.444150536558002e-08, "loss": 1.2541, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2998 }, { "epoch": 1.8814303638644918, "grad_norm": 2.5331225395202637, "learning_rate": 5.3874980608514535e-08, "loss": 1.2488, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 2999 }, { "epoch": 1.8820577164366374, "grad_norm": 1.7225735187530518, "learning_rate": 5.3311386842944125e-08, "loss": 1.279, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3000 }, { "epoch": 1.882685069008783, "grad_norm": 1.9744964838027954, "learning_rate": 5.275072474417719e-08, "loss": 1.1967, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3001 }, { "epoch": 1.8833124215809285, "grad_norm": 1.9109421968460083, "learning_rate": 5.2192994984009425e-08, "loss": 1.3013, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3002 }, { "epoch": 1.883939774153074, "grad_norm": 1.41983962059021, "learning_rate": 5.163819823072208e-08, "loss": 1.1192, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3003 }, { "epoch": 1.8845671267252195, "grad_norm": 1.7811996936798096, "learning_rate": 5.108633514908368e-08, "loss": 1.2609, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3004 }, { "epoch": 1.8851944792973652, "grad_norm": 1.9444513320922852, "learning_rate": 5.053740640034582e-08, "loss": 1.2662, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3005 }, { "epoch": 1.8858218318695106, "grad_norm": 2.0892276763916016, "learning_rate": 4.999141264224544e-08, "loss": 1.2915, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3006 }, { "epoch": 1.8864491844416562, "grad_norm": 1.5862740278244019, "learning_rate": 4.944835452900199e-08, "loss": 1.1375, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3007 }, { "epoch": 1.8870765370138018, "grad_norm": 1.6205672025680542, "learning_rate": 4.890823271131745e-08, "loss": 1.2271, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3008 }, { "epoch": 1.8877038895859473, "grad_norm": 1.8633159399032593, "learning_rate": 4.8371047836375806e-08, "loss": 1.1729, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3009 }, { "epoch": 1.888331242158093, "grad_norm": 1.459846019744873, "learning_rate": 4.783680054784162e-08, "loss": 1.2904, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3010 }, { "epoch": 1.8889585947302385, "grad_norm": 1.7166321277618408, "learning_rate": 4.730549148586006e-08, "loss": 1.1306, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3011 }, { "epoch": 1.889585947302384, "grad_norm": 1.547766089439392, "learning_rate": 4.677712128705464e-08, "loss": 1.1716, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3012 }, { "epoch": 1.8902132998745294, "grad_norm": 1.8805586099624634, "learning_rate": 4.6251690584528665e-08, "loss": 1.2843, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3013 }, { "epoch": 1.890840652446675, "grad_norm": 1.8120684623718262, "learning_rate": 4.5729200007862686e-08, "loss": 1.208, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3014 }, { "epoch": 1.8914680050188206, "grad_norm": 1.711977481842041, "learning_rate": 4.5209650183114514e-08, "loss": 1.2166, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3015 }, { "epoch": 1.892095357590966, "grad_norm": 1.627962589263916, "learning_rate": 4.469304173281785e-08, "loss": 1.1748, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3016 }, { "epoch": 1.8927227101631117, "grad_norm": 1.7329778671264648, "learning_rate": 4.417937527598226e-08, "loss": 1.2154, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3017 }, { "epoch": 1.8933500627352573, "grad_norm": 1.8263328075408936, "learning_rate": 4.3668651428092625e-08, "loss": 1.2522, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3018 }, { "epoch": 1.8939774153074027, "grad_norm": 2.0282092094421387, "learning_rate": 4.316087080110748e-08, "loss": 1.2865, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3019 }, { "epoch": 1.8946047678795483, "grad_norm": 1.613545298576355, "learning_rate": 4.2656034003458746e-08, "loss": 1.1478, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3020 }, { "epoch": 1.895232120451694, "grad_norm": 1.3981359004974365, "learning_rate": 4.215414164005116e-08, "loss": 1.1478, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3021 }, { "epoch": 1.8958594730238394, "grad_norm": 1.705987811088562, "learning_rate": 4.165519431226117e-08, "loss": 1.0804, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3022 }, { "epoch": 1.8964868255959848, "grad_norm": 1.808300256729126, "learning_rate": 4.115919261793638e-08, "loss": 1.2832, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3023 }, { "epoch": 1.8971141781681304, "grad_norm": 2.1151375770568848, "learning_rate": 4.0666137151395277e-08, "loss": 1.2959, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3024 }, { "epoch": 1.897741530740276, "grad_norm": 1.6511573791503906, "learning_rate": 4.017602850342584e-08, "loss": 1.4428, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3025 }, { "epoch": 1.8983688833124215, "grad_norm": 1.432206630706787, "learning_rate": 3.968886726128524e-08, "loss": 1.1333, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3026 }, { "epoch": 1.8989962358845671, "grad_norm": 1.6224288940429688, "learning_rate": 3.920465400869877e-08, "loss": 1.2393, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3027 }, { "epoch": 1.8996235884567128, "grad_norm": 1.6806602478027344, "learning_rate": 3.872338932585984e-08, "loss": 1.3892, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3028 }, { "epoch": 1.9002509410288582, "grad_norm": 2.271575927734375, "learning_rate": 3.8245073789427986e-08, "loss": 1.1943, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3029 }, { "epoch": 1.9008782936010038, "grad_norm": 1.6886036396026611, "learning_rate": 3.7769707972530046e-08, "loss": 1.2705, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3030 }, { "epoch": 1.9015056461731494, "grad_norm": 2.129394292831421, "learning_rate": 3.729729244475816e-08, "loss": 1.154, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3031 }, { "epoch": 1.9021329987452948, "grad_norm": 1.8195149898529053, "learning_rate": 3.682782777216898e-08, "loss": 1.2528, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3032 }, { "epoch": 1.9027603513174403, "grad_norm": 1.6862359046936035, "learning_rate": 3.6361314517283085e-08, "loss": 1.2078, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3033 }, { "epoch": 1.9033877038895861, "grad_norm": 1.1818653345108032, "learning_rate": 3.589775323908612e-08, "loss": 1.1659, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3034 }, { "epoch": 1.9040150564617315, "grad_norm": 1.4935318231582642, "learning_rate": 3.543714449302488e-08, "loss": 1.1956, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3035 }, { "epoch": 1.904642409033877, "grad_norm": 1.7930165529251099, "learning_rate": 3.497948883100927e-08, "loss": 1.2859, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3036 }, { "epoch": 1.9052697616060226, "grad_norm": 1.831816554069519, "learning_rate": 3.4524786801411195e-08, "loss": 1.2086, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3037 }, { "epoch": 1.9058971141781682, "grad_norm": 1.8915395736694336, "learning_rate": 3.407303894906205e-08, "loss": 1.2686, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3038 }, { "epoch": 1.9065244667503136, "grad_norm": 1.8743354082107544, "learning_rate": 3.362424581525498e-08, "loss": 1.4302, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3039 }, { "epoch": 1.9071518193224593, "grad_norm": 1.7570542097091675, "learning_rate": 3.317840793774174e-08, "loss": 1.3382, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3040 }, { "epoch": 1.9077791718946049, "grad_norm": 1.849229335784912, "learning_rate": 3.273552585073364e-08, "loss": 1.1924, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3041 }, { "epoch": 1.9084065244667503, "grad_norm": 1.9815819263458252, "learning_rate": 3.229560008490007e-08, "loss": 1.1692, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3042 }, { "epoch": 1.9090338770388957, "grad_norm": 1.822123646736145, "learning_rate": 3.18586311673677e-08, "loss": 1.2147, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3043 }, { "epoch": 1.9096612296110416, "grad_norm": 1.6399743556976318, "learning_rate": 3.142461962172105e-08, "loss": 1.245, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3044 }, { "epoch": 1.910288582183187, "grad_norm": 1.9598350524902344, "learning_rate": 3.099356596800024e-08, "loss": 1.1548, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3045 }, { "epoch": 1.9109159347553324, "grad_norm": 1.7600895166397095, "learning_rate": 3.056547072270183e-08, "loss": 1.2108, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3046 }, { "epoch": 1.911543287327478, "grad_norm": 1.9633840322494507, "learning_rate": 3.014033439877745e-08, "loss": 1.2317, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3047 }, { "epoch": 1.9121706398996237, "grad_norm": 1.5512139797210693, "learning_rate": 2.9718157505633226e-08, "loss": 1.3363, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3048 }, { "epoch": 1.912797992471769, "grad_norm": 1.947986364364624, "learning_rate": 2.9298940549128962e-08, "loss": 1.2215, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3049 }, { "epoch": 1.9134253450439147, "grad_norm": 1.9563969373703003, "learning_rate": 2.8882684031577845e-08, "loss": 1.2041, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3050 }, { "epoch": 1.9140526976160603, "grad_norm": 1.6992594003677368, "learning_rate": 2.846938845174646e-08, "loss": 1.3089, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3051 }, { "epoch": 1.9146800501882058, "grad_norm": 1.580871343612671, "learning_rate": 2.805905430485256e-08, "loss": 1.1649, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3052 }, { "epoch": 1.9153074027603512, "grad_norm": 1.5608967542648315, "learning_rate": 2.7651682082566743e-08, "loss": 1.4416, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3053 }, { "epoch": 1.915934755332497, "grad_norm": 1.9078105688095093, "learning_rate": 2.7247272273009108e-08, "loss": 1.3189, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3054 }, { "epoch": 1.9165621079046424, "grad_norm": 1.723378300666809, "learning_rate": 2.6845825360751198e-08, "loss": 1.275, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3055 }, { "epoch": 1.9171894604767878, "grad_norm": 1.582735300064087, "learning_rate": 2.6447341826814077e-08, "loss": 1.4529, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3056 }, { "epoch": 1.9178168130489335, "grad_norm": 1.8323571681976318, "learning_rate": 2.605182214866747e-08, "loss": 1.2575, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3057 }, { "epoch": 1.9184441656210791, "grad_norm": 1.9233479499816895, "learning_rate": 2.5659266800230897e-08, "loss": 1.3383, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3058 }, { "epoch": 1.9190715181932245, "grad_norm": 1.333604335784912, "learning_rate": 2.5269676251870878e-08, "loss": 1.1025, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3059 }, { "epoch": 1.9196988707653702, "grad_norm": 1.752911925315857, "learning_rate": 2.4883050970402334e-08, "loss": 1.2992, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3060 }, { "epoch": 1.9203262233375158, "grad_norm": 1.5046241283416748, "learning_rate": 2.4499391419086083e-08, "loss": 1.2568, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3061 }, { "epoch": 1.9209535759096612, "grad_norm": 1.6993424892425537, "learning_rate": 2.411869805763023e-08, "loss": 1.2413, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3062 }, { "epoch": 1.9215809284818066, "grad_norm": 1.9174491167068481, "learning_rate": 2.3740971342189056e-08, "loss": 1.1423, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3063 }, { "epoch": 1.9222082810539525, "grad_norm": 1.737568736076355, "learning_rate": 2.33662117253608e-08, "loss": 1.1946, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3064 }, { "epoch": 1.9228356336260979, "grad_norm": 1.9473689794540405, "learning_rate": 2.2994419656189594e-08, "loss": 1.3369, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3065 }, { "epoch": 1.9234629861982433, "grad_norm": 1.6701767444610596, "learning_rate": 2.262559558016325e-08, "loss": 1.3142, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3066 }, { "epoch": 1.924090338770389, "grad_norm": 1.6051278114318848, "learning_rate": 2.225973993921382e-08, "loss": 1.4871, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3067 }, { "epoch": 1.9247176913425346, "grad_norm": 1.8993639945983887, "learning_rate": 2.1896853171715916e-08, "loss": 1.2304, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3068 }, { "epoch": 1.92534504391468, "grad_norm": 1.937630295753479, "learning_rate": 2.1536935712486993e-08, "loss": 1.1656, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3069 }, { "epoch": 1.9259723964868256, "grad_norm": 1.7782254219055176, "learning_rate": 2.117998799278709e-08, "loss": 1.3517, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3070 }, { "epoch": 1.9265997490589712, "grad_norm": 1.7657594680786133, "learning_rate": 2.0826010440317125e-08, "loss": 1.1481, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3071 }, { "epoch": 1.9272271016311167, "grad_norm": 1.789623737335205, "learning_rate": 2.0475003479219492e-08, "loss": 1.2019, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3072 }, { "epoch": 1.927854454203262, "grad_norm": 1.2043588161468506, "learning_rate": 2.01269675300772e-08, "loss": 1.2038, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3073 }, { "epoch": 1.928481806775408, "grad_norm": 1.2602628469467163, "learning_rate": 1.978190300991334e-08, "loss": 1.0944, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3074 }, { "epoch": 1.9291091593475533, "grad_norm": 1.8765314817428589, "learning_rate": 1.94398103321905e-08, "loss": 1.2142, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3075 }, { "epoch": 1.9297365119196987, "grad_norm": 1.8176100254058838, "learning_rate": 1.9100689906809965e-08, "loss": 1.1382, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3076 }, { "epoch": 1.9303638644918444, "grad_norm": 1.77439546585083, "learning_rate": 1.876454214011253e-08, "loss": 1.3485, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3077 }, { "epoch": 1.93099121706399, "grad_norm": 1.075534701347351, "learning_rate": 1.8431367434876002e-08, "loss": 1.0438, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3078 }, { "epoch": 1.9316185696361354, "grad_norm": 1.7188340425491333, "learning_rate": 1.8101166190316876e-08, "loss": 1.1317, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3079 }, { "epoch": 1.932245922208281, "grad_norm": 1.5703520774841309, "learning_rate": 1.777393880208811e-08, "loss": 1.3624, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3080 }, { "epoch": 1.9328732747804267, "grad_norm": 1.767533779144287, "learning_rate": 1.744968566227939e-08, "loss": 1.0852, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3081 }, { "epoch": 1.933500627352572, "grad_norm": 1.7214370965957642, "learning_rate": 1.7128407159416604e-08, "loss": 1.2785, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3082 }, { "epoch": 1.9341279799247177, "grad_norm": 1.7513182163238525, "learning_rate": 1.6810103678462088e-08, "loss": 1.1782, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3083 }, { "epoch": 1.9347553324968634, "grad_norm": 1.5604854822158813, "learning_rate": 1.6494775600812418e-08, "loss": 1.2548, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3084 }, { "epoch": 1.9353826850690088, "grad_norm": 1.8918966054916382, "learning_rate": 1.6182423304299255e-08, "loss": 1.2621, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3085 }, { "epoch": 1.9360100376411542, "grad_norm": 1.689467430114746, "learning_rate": 1.5873047163189326e-08, "loss": 1.2018, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3086 }, { "epoch": 1.9366373902132998, "grad_norm": 1.5074502229690552, "learning_rate": 1.556664754818249e-08, "loss": 1.2346, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3087 }, { "epoch": 1.9372647427854455, "grad_norm": 1.5012305974960327, "learning_rate": 1.5263224826412292e-08, "loss": 1.1775, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3088 }, { "epoch": 1.9378920953575909, "grad_norm": 1.8126957416534424, "learning_rate": 1.496277936144541e-08, "loss": 1.4159, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3089 }, { "epoch": 1.9385194479297365, "grad_norm": 1.548186182975769, "learning_rate": 1.4665311513280822e-08, "loss": 1.2268, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3090 }, { "epoch": 1.9391468005018822, "grad_norm": 1.7500059604644775, "learning_rate": 1.4370821638350353e-08, "loss": 1.1917, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3091 }, { "epoch": 1.9397741530740276, "grad_norm": 1.6493536233901978, "learning_rate": 1.4079310089516741e-08, "loss": 1.0922, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3092 }, { "epoch": 1.9404015056461732, "grad_norm": 1.9024580717086792, "learning_rate": 1.3790777216074747e-08, "loss": 1.1731, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3093 }, { "epoch": 1.9410288582183188, "grad_norm": 1.912346601486206, "learning_rate": 1.3505223363749487e-08, "loss": 1.2582, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3094 }, { "epoch": 1.9416562107904642, "grad_norm": 1.685202717781067, "learning_rate": 1.3222648874696986e-08, "loss": 1.0968, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3095 }, { "epoch": 1.9422835633626097, "grad_norm": 1.895370602607727, "learning_rate": 1.294305408750307e-08, "loss": 1.2619, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3096 }, { "epoch": 1.9429109159347553, "grad_norm": 1.3072205781936646, "learning_rate": 1.2666439337183089e-08, "loss": 1.1337, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3097 }, { "epoch": 1.943538268506901, "grad_norm": 1.7828336954116821, "learning_rate": 1.2392804955181915e-08, "loss": 1.2324, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3098 }, { "epoch": 1.9441656210790463, "grad_norm": 1.8671600818634033, "learning_rate": 1.2122151269373383e-08, "loss": 1.2573, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3099 }, { "epoch": 1.944792973651192, "grad_norm": 1.743040919303894, "learning_rate": 1.185447860405975e-08, "loss": 1.2609, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3100 }, { "epoch": 1.9454203262233376, "grad_norm": 1.7714751958847046, "learning_rate": 1.158978727997112e-08, "loss": 1.1616, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3101 }, { "epoch": 1.946047678795483, "grad_norm": 1.590544581413269, "learning_rate": 1.1328077614265465e-08, "loss": 1.0432, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3102 }, { "epoch": 1.9466750313676286, "grad_norm": 2.0189173221588135, "learning_rate": 1.1069349920528327e-08, "loss": 1.3164, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3103 }, { "epoch": 1.9473023839397743, "grad_norm": 1.8704915046691895, "learning_rate": 1.081360450877117e-08, "loss": 1.186, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3104 }, { "epoch": 1.9479297365119197, "grad_norm": 1.8126301765441895, "learning_rate": 1.0560841685433864e-08, "loss": 1.1901, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3105 }, { "epoch": 1.948557089084065, "grad_norm": 1.7933433055877686, "learning_rate": 1.0311061753380536e-08, "loss": 1.1437, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3106 }, { "epoch": 1.9491844416562107, "grad_norm": 1.9376479387283325, "learning_rate": 1.006426501190233e-08, "loss": 1.3331, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3107 }, { "epoch": 1.9498117942283564, "grad_norm": 1.83939790725708, "learning_rate": 9.820451756715754e-09, "loss": 1.3347, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3108 }, { "epoch": 1.9504391468005018, "grad_norm": 1.802091360092163, "learning_rate": 9.579622279962397e-09, "loss": 1.2383, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3109 }, { "epoch": 1.9510664993726474, "grad_norm": 1.7190696001052856, "learning_rate": 9.341776870208096e-09, "loss": 1.2428, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3110 }, { "epoch": 1.951693851944793, "grad_norm": 1.8278087377548218, "learning_rate": 9.106915812443772e-09, "loss": 1.2512, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3111 }, { "epoch": 1.9523212045169385, "grad_norm": 2.022813320159912, "learning_rate": 8.875039388084317e-09, "loss": 1.3083, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3112 }, { "epoch": 1.952948557089084, "grad_norm": 1.781859278678894, "learning_rate": 8.646147874968037e-09, "loss": 1.2895, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3113 }, { "epoch": 1.9535759096612297, "grad_norm": 1.8502413034439087, "learning_rate": 8.420241547356933e-09, "loss": 1.3009, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3114 }, { "epoch": 1.9542032622333751, "grad_norm": 1.5541293621063232, "learning_rate": 8.197320675936148e-09, "loss": 1.1714, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3115 }, { "epoch": 1.9548306148055206, "grad_norm": 1.9019731283187866, "learning_rate": 7.977385527813963e-09, "loss": 1.16, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3116 }, { "epoch": 1.9554579673776662, "grad_norm": 1.3666876554489136, "learning_rate": 7.760436366519853e-09, "loss": 1.0896, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3117 }, { "epoch": 1.9560853199498118, "grad_norm": 0.9233667254447937, "learning_rate": 7.546473452006708e-09, "loss": 1.0859, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3118 }, { "epoch": 1.9567126725219572, "grad_norm": 1.7889835834503174, "learning_rate": 7.335497040648898e-09, "loss": 1.2028, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3119 }, { "epoch": 1.9573400250941029, "grad_norm": 4.186465740203857, "learning_rate": 7.127507385241983e-09, "loss": 1.2708, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3120 }, { "epoch": 1.9579673776662485, "grad_norm": 1.7843849658966064, "learning_rate": 6.922504735002999e-09, "loss": 1.1866, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3121 }, { "epoch": 1.958594730238394, "grad_norm": 1.6640843152999878, "learning_rate": 6.7204893355696245e-09, "loss": 1.2275, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3122 }, { "epoch": 1.9592220828105396, "grad_norm": 2.044402599334717, "learning_rate": 6.5214614290010085e-09, "loss": 1.2866, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3123 }, { "epoch": 1.9598494353826852, "grad_norm": 1.519188642501831, "learning_rate": 6.325421253775277e-09, "loss": 1.2247, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3124 }, { "epoch": 1.9604767879548306, "grad_norm": 1.9582196474075317, "learning_rate": 6.1323690447917525e-09, "loss": 1.2913, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3125 }, { "epoch": 1.961104140526976, "grad_norm": 2.0063629150390625, "learning_rate": 5.942305033369289e-09, "loss": 1.2017, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3126 }, { "epoch": 1.9617314930991219, "grad_norm": 1.6914819478988647, "learning_rate": 5.755229447245436e-09, "loss": 1.2231, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3127 }, { "epoch": 1.9623588456712673, "grad_norm": 1.9196043014526367, "learning_rate": 5.5711425105781096e-09, "loss": 1.2322, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3128 }, { "epoch": 1.9629861982434127, "grad_norm": 1.8479219675064087, "learning_rate": 5.390044443943365e-09, "loss": 1.2172, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3129 }, { "epoch": 1.9636135508155583, "grad_norm": 1.501006841659546, "learning_rate": 5.211935464336238e-09, "loss": 1.0868, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3130 }, { "epoch": 1.964240903387704, "grad_norm": 1.5361695289611816, "learning_rate": 5.036815785170179e-09, "loss": 1.0671, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3131 }, { "epoch": 1.9648682559598494, "grad_norm": 1.5813148021697998, "learning_rate": 4.864685616276788e-09, "loss": 1.3122, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3132 }, { "epoch": 1.965495608531995, "grad_norm": 1.9053575992584229, "learning_rate": 4.695545163905524e-09, "loss": 1.2872, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3133 }, { "epoch": 1.9661229611041406, "grad_norm": 1.71331787109375, "learning_rate": 4.529394630723438e-09, "loss": 1.2103, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3134 }, { "epoch": 1.966750313676286, "grad_norm": 2.0272982120513916, "learning_rate": 4.366234215815446e-09, "loss": 1.3642, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3135 }, { "epoch": 1.9673776662484315, "grad_norm": 1.7172526121139526, "learning_rate": 4.206064114682939e-09, "loss": 1.1586, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3136 }, { "epoch": 1.9680050188205773, "grad_norm": 1.4065996408462524, "learning_rate": 4.0488845192449e-09, "loss": 1.2617, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3137 }, { "epoch": 1.9686323713927227, "grad_norm": 1.587286353111267, "learning_rate": 3.894695617836786e-09, "loss": 1.285, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3138 }, { "epoch": 1.9692597239648681, "grad_norm": 1.8322442770004272, "learning_rate": 3.743497595210255e-09, "loss": 1.2328, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3139 }, { "epoch": 1.9698870765370138, "grad_norm": 1.8742417097091675, "learning_rate": 3.5952906325339988e-09, "loss": 1.2586, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3140 }, { "epoch": 1.9705144291091594, "grad_norm": 1.5411434173583984, "learning_rate": 3.4500749073920757e-09, "loss": 1.1538, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3141 }, { "epoch": 1.9711417816813048, "grad_norm": 13.465494155883789, "learning_rate": 3.3078505937844674e-09, "loss": 1.2148, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3142 }, { "epoch": 1.9717691342534505, "grad_norm": 1.985593318939209, "learning_rate": 3.168617862127077e-09, "loss": 1.1492, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3143 }, { "epoch": 1.972396486825596, "grad_norm": 1.571092963218689, "learning_rate": 3.0323768792508978e-09, "loss": 1.1533, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3144 }, { "epoch": 1.9730238393977415, "grad_norm": 1.5139721632003784, "learning_rate": 2.8991278084025687e-09, "loss": 1.2752, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3145 }, { "epoch": 1.973651191969887, "grad_norm": 1.6269372701644897, "learning_rate": 2.7688708092435403e-09, "loss": 1.1284, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3146 }, { "epoch": 1.9742785445420328, "grad_norm": 1.8731541633605957, "learning_rate": 2.641606037850353e-09, "loss": 1.2279, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3147 }, { "epoch": 1.9749058971141782, "grad_norm": 1.8516762256622314, "learning_rate": 2.5173336467135266e-09, "loss": 1.1847, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3148 }, { "epoch": 1.9755332496863236, "grad_norm": 1.9598283767700195, "learning_rate": 2.3960537847383946e-09, "loss": 1.1979, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3149 }, { "epoch": 1.9761606022584692, "grad_norm": 1.7261936664581299, "learning_rate": 2.2777665972453788e-09, "loss": 1.2985, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3150 }, { "epoch": 1.9767879548306149, "grad_norm": 1.8943806886672974, "learning_rate": 2.1624722259674934e-09, "loss": 1.2649, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3151 }, { "epoch": 1.9774153074027603, "grad_norm": 1.5255440473556519, "learning_rate": 2.050170809053398e-09, "loss": 1.3109, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3152 }, { "epoch": 1.978042659974906, "grad_norm": 1.7608319520950317, "learning_rate": 1.9408624810640654e-09, "loss": 1.2154, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3153 }, { "epoch": 1.9786700125470515, "grad_norm": 1.6721044778823853, "learning_rate": 1.834547372975004e-09, "loss": 1.1618, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3154 }, { "epoch": 1.979297365119197, "grad_norm": 1.690503716468811, "learning_rate": 1.7312256121748695e-09, "loss": 1.2142, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3155 }, { "epoch": 1.9799247176913424, "grad_norm": 1.3389973640441895, "learning_rate": 1.6308973224654634e-09, "loss": 1.1176, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3156 }, { "epoch": 1.9805520702634882, "grad_norm": 1.7939364910125732, "learning_rate": 1.5335626240622903e-09, "loss": 1.3278, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3157 }, { "epoch": 1.9811794228356336, "grad_norm": 2.0031301975250244, "learning_rate": 1.4392216335934462e-09, "loss": 1.2844, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3158 }, { "epoch": 1.981806775407779, "grad_norm": 1.8132704496383667, "learning_rate": 1.3478744640998963e-09, "loss": 1.3131, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3159 }, { "epoch": 1.9824341279799247, "grad_norm": 1.3770238161087036, "learning_rate": 1.2595212250357536e-09, "loss": 1.1148, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3160 }, { "epoch": 1.9830614805520703, "grad_norm": 1.5473389625549316, "learning_rate": 1.1741620222671667e-09, "loss": 1.0943, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3161 }, { "epoch": 1.9836888331242157, "grad_norm": 1.7902740240097046, "learning_rate": 1.0917969580734322e-09, "loss": 1.0838, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3162 }, { "epoch": 1.9843161856963614, "grad_norm": 1.8456615209579468, "learning_rate": 1.0124261311453276e-09, "loss": 1.305, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3163 }, { "epoch": 1.984943538268507, "grad_norm": 1.8739662170410156, "learning_rate": 9.360496365870552e-10, "loss": 1.2175, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3164 }, { "epoch": 1.9855708908406524, "grad_norm": 1.9121259450912476, "learning_rate": 8.62667565913744e-10, "loss": 1.2805, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3165 }, { "epoch": 1.986198243412798, "grad_norm": 1.4620251655578613, "learning_rate": 7.922800070536696e-10, "loss": 1.3768, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3166 }, { "epoch": 1.9868255959849437, "grad_norm": 1.821302890777588, "learning_rate": 7.248870443460342e-10, "loss": 1.2826, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3167 }, { "epoch": 1.987452948557089, "grad_norm": 2.1709654331207275, "learning_rate": 6.604887585426323e-10, "loss": 1.227, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3168 }, { "epoch": 1.9880803011292345, "grad_norm": 1.6179709434509277, "learning_rate": 5.990852268064617e-10, "loss": 1.3695, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3169 }, { "epoch": 1.9887076537013801, "grad_norm": 1.6689754724502563, "learning_rate": 5.406765227122801e-10, "loss": 1.3144, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3170 }, { "epoch": 1.9893350062735258, "grad_norm": 1.9300893545150757, "learning_rate": 4.852627162468814e-10, "loss": 1.1671, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3171 }, { "epoch": 1.9899623588456712, "grad_norm": 1.872180700302124, "learning_rate": 4.3284387380743145e-10, "loss": 1.3608, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3172 }, { "epoch": 1.9905897114178168, "grad_norm": 1.7151243686676025, "learning_rate": 3.834200582036873e-10, "loss": 1.1362, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3173 }, { "epoch": 1.9912170639899625, "grad_norm": 1.7240897417068481, "learning_rate": 3.3699132865605554e-10, "loss": 1.317, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3174 }, { "epoch": 1.9918444165621079, "grad_norm": 1.8286082744598389, "learning_rate": 2.9355774079614653e-10, "loss": 1.2865, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3175 }, { "epoch": 1.9924717691342535, "grad_norm": 2.1468513011932373, "learning_rate": 2.5311934666705227e-10, "loss": 1.136, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3176 }, { "epoch": 1.9930991217063991, "grad_norm": 1.8591892719268799, "learning_rate": 2.1567619472279144e-10, "loss": 1.3733, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3177 }, { "epoch": 1.9937264742785445, "grad_norm": 1.981756567955017, "learning_rate": 1.812283298280315e-10, "loss": 1.4058, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3178 }, { "epoch": 1.99435382685069, "grad_norm": 1.5068249702453613, "learning_rate": 1.497757932591992e-10, "loss": 1.1924, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3179 }, { "epoch": 1.9949811794228356, "grad_norm": 3.0467026233673096, "learning_rate": 1.2131862270337025e-10, "loss": 1.2362, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3180 }, { "epoch": 1.9956085319949812, "grad_norm": 1.921818494796753, "learning_rate": 9.585685225826924e-11, "loss": 1.3118, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3181 }, { "epoch": 1.9962358845671266, "grad_norm": 1.6637516021728516, "learning_rate": 7.339051243254735e-11, "loss": 1.2209, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3182 }, { "epoch": 1.9968632371392723, "grad_norm": 1.7344951629638672, "learning_rate": 5.391963014605983e-11, "loss": 1.3776, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3183 }, { "epoch": 1.997490589711418, "grad_norm": 1.964633822441101, "learning_rate": 3.744422872875575e-11, "loss": 1.2294, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3184 }, { "epoch": 1.9981179422835633, "grad_norm": 1.756880521774292, "learning_rate": 2.3964327922343377e-11, "loss": 1.2063, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3185 }, { "epoch": 1.998745294855709, "grad_norm": 1.5146030187606812, "learning_rate": 1.3479943878347279e-11, "loss": 1.1933, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3186 }, { "epoch": 1.9993726474278546, "grad_norm": 1.5605469942092896, "learning_rate": 5.9910891592185325e-12, "loss": 1.2432, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3187 }, { "epoch": 2.0, "grad_norm": 1.724379062652588, "learning_rate": 1.497772738334735e-12, "loss": 1.1339, "memory/device_mem_reserved(gib)": 67.56, "memory/max_mem_active(gib)": 62.26, "memory/max_mem_allocated(gib)": 61.07, "step": 3188 } ], "logging_steps": 1, "max_steps": 3188, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 797, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2702356450110027e+20, "train_batch_size": 2, "trial_name": null, "trial_params": null }