| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 315, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.009523809523809525, |
| "grad_norm": 2.447430372238159, |
| "learning_rate": 0.0, |
| "loss": 1.8171, |
| "memory/device_mem_reserved(gib)": 36.42, |
| "memory/max_mem_active(gib)": 35.17, |
| "memory/max_mem_allocated(gib)": 35.17, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.01904761904761905, |
| "grad_norm": 2.280487537384033, |
| "learning_rate": 2.2222222222222223e-05, |
| "loss": 1.7544, |
| "memory/device_mem_reserved(gib)": 36.45, |
| "memory/max_mem_active(gib)": 35.23, |
| "memory/max_mem_allocated(gib)": 35.23, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.02857142857142857, |
| "grad_norm": 2.248966693878174, |
| "learning_rate": 4.4444444444444447e-05, |
| "loss": 1.7546, |
| "memory/device_mem_reserved(gib)": 47.1, |
| "memory/max_mem_active(gib)": 45.45, |
| "memory/max_mem_allocated(gib)": 45.45, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0380952380952381, |
| "grad_norm": 2.2559540271759033, |
| "learning_rate": 6.666666666666667e-05, |
| "loss": 1.8011, |
| "memory/device_mem_reserved(gib)": 47.1, |
| "memory/max_mem_active(gib)": 45.45, |
| "memory/max_mem_allocated(gib)": 45.45, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.047619047619047616, |
| "grad_norm": 2.149872064590454, |
| "learning_rate": 8.888888888888889e-05, |
| "loss": 1.6465, |
| "memory/device_mem_reserved(gib)": 47.1, |
| "memory/max_mem_active(gib)": 45.45, |
| "memory/max_mem_allocated(gib)": 45.45, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.05714285714285714, |
| "grad_norm": 1.6111228466033936, |
| "learning_rate": 0.00011111111111111112, |
| "loss": 1.5935, |
| "memory/device_mem_reserved(gib)": 47.1, |
| "memory/max_mem_active(gib)": 45.45, |
| "memory/max_mem_allocated(gib)": 45.45, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.06666666666666667, |
| "grad_norm": 1.363952875137329, |
| "learning_rate": 0.00013333333333333334, |
| "loss": 1.4371, |
| "memory/device_mem_reserved(gib)": 47.1, |
| "memory/max_mem_active(gib)": 45.45, |
| "memory/max_mem_allocated(gib)": 45.45, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0761904761904762, |
| "grad_norm": 1.1439425945281982, |
| "learning_rate": 0.00015555555555555556, |
| "loss": 1.2409, |
| "memory/device_mem_reserved(gib)": 47.1, |
| "memory/max_mem_active(gib)": 45.45, |
| "memory/max_mem_allocated(gib)": 45.45, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.08571428571428572, |
| "grad_norm": 0.877252995967865, |
| "learning_rate": 0.00017777777777777779, |
| "loss": 1.1083, |
| "memory/device_mem_reserved(gib)": 47.1, |
| "memory/max_mem_active(gib)": 45.45, |
| "memory/max_mem_allocated(gib)": 45.45, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.09523809523809523, |
| "grad_norm": 0.8071479797363281, |
| "learning_rate": 0.0002, |
| "loss": 0.9937, |
| "memory/device_mem_reserved(gib)": 47.1, |
| "memory/max_mem_active(gib)": 45.45, |
| "memory/max_mem_allocated(gib)": 45.45, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.10476190476190476, |
| "grad_norm": 0.7772606015205383, |
| "learning_rate": 0.00019999472984871732, |
| "loss": 0.9235, |
| "memory/device_mem_reserved(gib)": 47.1, |
| "memory/max_mem_active(gib)": 45.45, |
| "memory/max_mem_allocated(gib)": 45.45, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.11428571428571428, |
| "grad_norm": 0.811764657497406, |
| "learning_rate": 0.00019997891995035912, |
| "loss": 0.7635, |
| "memory/device_mem_reserved(gib)": 47.1, |
| "memory/max_mem_active(gib)": 45.45, |
| "memory/max_mem_allocated(gib)": 45.45, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.12380952380952381, |
| "grad_norm": 0.8423659801483154, |
| "learning_rate": 0.0001999525719713366, |
| "loss": 0.6836, |
| "memory/device_mem_reserved(gib)": 47.1, |
| "memory/max_mem_active(gib)": 45.45, |
| "memory/max_mem_allocated(gib)": 45.45, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.13333333333333333, |
| "grad_norm": 0.7858280539512634, |
| "learning_rate": 0.0001999156886888064, |
| "loss": 0.6159, |
| "memory/device_mem_reserved(gib)": 47.1, |
| "memory/max_mem_active(gib)": 45.45, |
| "memory/max_mem_allocated(gib)": 45.45, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 0.739560604095459, |
| "learning_rate": 0.00019986827399037812, |
| "loss": 0.5082, |
| "memory/device_mem_reserved(gib)": 47.1, |
| "memory/max_mem_active(gib)": 45.45, |
| "memory/max_mem_allocated(gib)": 45.45, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.1523809523809524, |
| "grad_norm": 0.6787500381469727, |
| "learning_rate": 0.00019981033287370443, |
| "loss": 0.4553, |
| "memory/device_mem_reserved(gib)": 47.1, |
| "memory/max_mem_active(gib)": 45.45, |
| "memory/max_mem_allocated(gib)": 45.45, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.1619047619047619, |
| "grad_norm": 0.6258607506752014, |
| "learning_rate": 0.00019974187144595432, |
| "loss": 0.3913, |
| "memory/device_mem_reserved(gib)": 47.1, |
| "memory/max_mem_active(gib)": 45.45, |
| "memory/max_mem_allocated(gib)": 45.45, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.17142857142857143, |
| "grad_norm": 0.47939175367355347, |
| "learning_rate": 0.00019966289692316944, |
| "loss": 0.3048, |
| "memory/device_mem_reserved(gib)": 47.1, |
| "memory/max_mem_active(gib)": 45.45, |
| "memory/max_mem_allocated(gib)": 45.45, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.18095238095238095, |
| "grad_norm": 0.49321815371513367, |
| "learning_rate": 0.00019957341762950344, |
| "loss": 0.3417, |
| "memory/device_mem_reserved(gib)": 47.1, |
| "memory/max_mem_active(gib)": 45.45, |
| "memory/max_mem_allocated(gib)": 45.45, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.19047619047619047, |
| "grad_norm": 0.4252234101295471, |
| "learning_rate": 0.00019947344299634464, |
| "loss": 0.3033, |
| "memory/device_mem_reserved(gib)": 47.1, |
| "memory/max_mem_active(gib)": 45.45, |
| "memory/max_mem_allocated(gib)": 45.45, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.46225014328956604, |
| "learning_rate": 0.00019936298356132176, |
| "loss": 0.2984, |
| "memory/device_mem_reserved(gib)": 47.1, |
| "memory/max_mem_active(gib)": 45.45, |
| "memory/max_mem_allocated(gib)": 45.45, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.20952380952380953, |
| "grad_norm": 0.4106466472148895, |
| "learning_rate": 0.0001992420509671936, |
| "loss": 0.2764, |
| "memory/device_mem_reserved(gib)": 47.12, |
| "memory/max_mem_active(gib)": 45.46, |
| "memory/max_mem_allocated(gib)": 45.46, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.21904761904761905, |
| "grad_norm": 0.3764067590236664, |
| "learning_rate": 0.00019911065796062135, |
| "loss": 0.2352, |
| "memory/device_mem_reserved(gib)": 47.12, |
| "memory/max_mem_active(gib)": 45.46, |
| "memory/max_mem_allocated(gib)": 45.46, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.22857142857142856, |
| "grad_norm": 0.38518962264060974, |
| "learning_rate": 0.00019896881839082556, |
| "loss": 0.2239, |
| "memory/device_mem_reserved(gib)": 47.12, |
| "memory/max_mem_active(gib)": 45.46, |
| "memory/max_mem_allocated(gib)": 45.46, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.23809523809523808, |
| "grad_norm": 0.39728403091430664, |
| "learning_rate": 0.00019881654720812594, |
| "loss": 0.2192, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.46, |
| "memory/max_mem_allocated(gib)": 45.46, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.24761904761904763, |
| "grad_norm": 0.3667093813419342, |
| "learning_rate": 0.00019865386046236596, |
| "loss": 0.2834, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.46, |
| "memory/max_mem_allocated(gib)": 45.46, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.2571428571428571, |
| "grad_norm": 0.32502660155296326, |
| "learning_rate": 0.00019848077530122083, |
| "loss": 0.2166, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.46, |
| "memory/max_mem_allocated(gib)": 45.46, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "grad_norm": 0.31225672364234924, |
| "learning_rate": 0.0001982973099683902, |
| "loss": 0.2238, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.46, |
| "memory/max_mem_allocated(gib)": 45.46, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.2761904761904762, |
| "grad_norm": 0.27448731660842896, |
| "learning_rate": 0.00019810348380167527, |
| "loss": 0.1983, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.46, |
| "memory/max_mem_allocated(gib)": 45.46, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 0.32014158368110657, |
| "learning_rate": 0.00019789931723094046, |
| "loss": 0.2554, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.46, |
| "memory/max_mem_allocated(gib)": 45.46, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.29523809523809524, |
| "grad_norm": 0.39480143785476685, |
| "learning_rate": 0.0001976848317759601, |
| "loss": 0.2438, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.46, |
| "memory/max_mem_allocated(gib)": 45.46, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.3047619047619048, |
| "grad_norm": 0.28004321455955505, |
| "learning_rate": 0.00019746005004415005, |
| "loss": 0.2145, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.46, |
| "memory/max_mem_allocated(gib)": 45.46, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.3142857142857143, |
| "grad_norm": 0.2754361033439636, |
| "learning_rate": 0.00019722499572818496, |
| "loss": 0.2284, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.46, |
| "memory/max_mem_allocated(gib)": 45.46, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.3238095238095238, |
| "grad_norm": 0.3160915970802307, |
| "learning_rate": 0.00019697969360350098, |
| "loss": 0.2482, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.24699093401432037, |
| "learning_rate": 0.00019672416952568416, |
| "loss": 0.1916, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.34285714285714286, |
| "grad_norm": 0.2674485743045807, |
| "learning_rate": 0.00019645845042774553, |
| "loss": 0.2204, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.3523809523809524, |
| "grad_norm": 0.33608749508857727, |
| "learning_rate": 0.00019618256431728194, |
| "loss": 0.235, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.3619047619047619, |
| "grad_norm": 0.29581594467163086, |
| "learning_rate": 0.00019589654027352414, |
| "loss": 0.2175, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.37142857142857144, |
| "grad_norm": 0.3526640832424164, |
| "learning_rate": 0.0001956004084442718, |
| "loss": 0.1989, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.38095238095238093, |
| "grad_norm": 0.2882135808467865, |
| "learning_rate": 0.00019529420004271567, |
| "loss": 0.2144, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3904761904761905, |
| "grad_norm": 0.2675183117389679, |
| "learning_rate": 0.0001949779473441478, |
| "loss": 0.1987, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.2908264696598053, |
| "learning_rate": 0.00019465168368255946, |
| "loss": 0.1893, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.4095238095238095, |
| "grad_norm": 0.3211234211921692, |
| "learning_rate": 0.00019431544344712776, |
| "loss": 0.2008, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.41904761904761906, |
| "grad_norm": 0.2795293927192688, |
| "learning_rate": 0.00019396926207859084, |
| "loss": 0.1935, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 0.28679871559143066, |
| "learning_rate": 0.00019361317606551238, |
| "loss": 0.1987, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.4380952380952381, |
| "grad_norm": 0.3165067732334137, |
| "learning_rate": 0.00019324722294043558, |
| "loss": 0.2118, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.44761904761904764, |
| "grad_norm": 0.25720369815826416, |
| "learning_rate": 0.00019287144127592704, |
| "loss": 0.1962, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.45714285714285713, |
| "grad_norm": 0.2577202022075653, |
| "learning_rate": 0.0001924858706805112, |
| "loss": 0.2022, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.4666666666666667, |
| "grad_norm": 0.2605520188808441, |
| "learning_rate": 0.0001920905517944954, |
| "loss": 0.1891, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.47619047619047616, |
| "grad_norm": 0.2890350818634033, |
| "learning_rate": 0.00019168552628568631, |
| "loss": 0.17, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4857142857142857, |
| "grad_norm": 0.25532475113868713, |
| "learning_rate": 0.00019127083684499806, |
| "loss": 0.1796, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.49523809523809526, |
| "grad_norm": 0.2963904142379761, |
| "learning_rate": 0.00019084652718195238, |
| "loss": 0.19, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.5047619047619047, |
| "grad_norm": 0.24455095827579498, |
| "learning_rate": 0.0001904126420200716, |
| "loss": 0.2017, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.5142857142857142, |
| "grad_norm": 0.2518145442008972, |
| "learning_rate": 0.00018996922709216455, |
| "loss": 0.1896, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.5238095238095238, |
| "grad_norm": 0.26225805282592773, |
| "learning_rate": 0.00018951632913550626, |
| "loss": 0.1893, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 0.25169169902801514, |
| "learning_rate": 0.00018905399588691163, |
| "loss": 0.1939, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.5428571428571428, |
| "grad_norm": 0.23847182095050812, |
| "learning_rate": 0.00018858227607770398, |
| "loss": 0.1686, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.5523809523809524, |
| "grad_norm": 0.30888280272483826, |
| "learning_rate": 0.00018810121942857845, |
| "loss": 0.1768, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.5619047619047619, |
| "grad_norm": 0.27275580167770386, |
| "learning_rate": 0.00018761087664436138, |
| "loss": 0.1941, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.22851359844207764, |
| "learning_rate": 0.00018711129940866575, |
| "loss": 0.1658, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.580952380952381, |
| "grad_norm": 0.25515425205230713, |
| "learning_rate": 0.00018660254037844388, |
| "loss": 0.1858, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.5904761904761905, |
| "grad_norm": 0.24301765859127045, |
| "learning_rate": 0.00018608465317843678, |
| "loss": 0.1892, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.24329794943332672, |
| "learning_rate": 0.00018555769239552233, |
| "loss": 0.1719, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.6095238095238096, |
| "grad_norm": 0.3206610381603241, |
| "learning_rate": 0.00018502171357296144, |
| "loss": 0.2075, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.6190476190476191, |
| "grad_norm": 0.3089618980884552, |
| "learning_rate": 0.00018447677320454367, |
| "loss": 0.185, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.6285714285714286, |
| "grad_norm": 0.291916161775589, |
| "learning_rate": 0.00018392292872863267, |
| "loss": 0.1739, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.638095238095238, |
| "grad_norm": 0.25783228874206543, |
| "learning_rate": 0.00018336023852211195, |
| "loss": 0.2061, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.6476190476190476, |
| "grad_norm": 0.310614675283432, |
| "learning_rate": 0.00018278876189423179, |
| "loss": 0.1984, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.6571428571428571, |
| "grad_norm": 0.23836325109004974, |
| "learning_rate": 0.00018220855908035783, |
| "loss": 0.1742, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.30577945709228516, |
| "learning_rate": 0.0001816196912356222, |
| "loss": 0.1954, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.6761904761904762, |
| "grad_norm": 0.3177741467952728, |
| "learning_rate": 0.00018102222042847737, |
| "loss": 0.193, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.6857142857142857, |
| "grad_norm": 0.3444816470146179, |
| "learning_rate": 0.00018041620963415417, |
| "loss": 0.1837, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.6952380952380952, |
| "grad_norm": 0.3139638900756836, |
| "learning_rate": 0.000179801722728024, |
| "loss": 0.1902, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.7047619047619048, |
| "grad_norm": 0.24754184484481812, |
| "learning_rate": 0.00017917882447886582, |
| "loss": 0.1612, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 0.23204036056995392, |
| "learning_rate": 0.00017854758054203988, |
| "loss": 0.1694, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.7238095238095238, |
| "grad_norm": 0.2823260426521301, |
| "learning_rate": 0.00017790805745256704, |
| "loss": 0.1739, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.7333333333333333, |
| "grad_norm": 0.3193572163581848, |
| "learning_rate": 0.0001772603226181159, |
| "loss": 0.1924, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.7428571428571429, |
| "grad_norm": 0.30530717968940735, |
| "learning_rate": 0.0001766044443118978, |
| "loss": 0.1375, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.7523809523809524, |
| "grad_norm": 0.2974378764629364, |
| "learning_rate": 0.00017594049166547073, |
| "loss": 0.1866, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.7619047619047619, |
| "grad_norm": 0.26553574204444885, |
| "learning_rate": 0.00017526853466145244, |
| "loss": 0.193, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.7714285714285715, |
| "grad_norm": 0.32059594988822937, |
| "learning_rate": 0.00017458864412614434, |
| "loss": 0.1822, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.780952380952381, |
| "grad_norm": 0.3260822594165802, |
| "learning_rate": 0.00017390089172206592, |
| "loss": 0.1734, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.7904761904761904, |
| "grad_norm": 0.2754499912261963, |
| "learning_rate": 0.00017320534994040148, |
| "loss": 0.1814, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.2639872431755066, |
| "learning_rate": 0.00017250209209335927, |
| "loss": 0.1764, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.8095238095238095, |
| "grad_norm": 0.22905386984348297, |
| "learning_rate": 0.0001717911923064442, |
| "loss": 0.1365, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.819047619047619, |
| "grad_norm": 0.26005449891090393, |
| "learning_rate": 0.00017107272551064473, |
| "loss": 0.1756, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.8285714285714286, |
| "grad_norm": 0.25042417645454407, |
| "learning_rate": 0.00017034676743453499, |
| "loss": 0.15, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.8380952380952381, |
| "grad_norm": 0.34358277916908264, |
| "learning_rate": 0.0001696133945962927, |
| "loss": 0.1763, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.8476190476190476, |
| "grad_norm": 0.23855257034301758, |
| "learning_rate": 0.0001688726842956339, |
| "loss": 0.1547, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.2579350173473358, |
| "learning_rate": 0.0001681247146056654, |
| "loss": 0.1872, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.8666666666666667, |
| "grad_norm": 0.30034390091896057, |
| "learning_rate": 0.00016736956436465573, |
| "loss": 0.1558, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.8761904761904762, |
| "grad_norm": 0.28185054659843445, |
| "learning_rate": 0.00016660731316772505, |
| "loss": 0.1878, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.8857142857142857, |
| "grad_norm": 0.32514333724975586, |
| "learning_rate": 0.0001658380413584558, |
| "loss": 0.1705, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.8952380952380953, |
| "grad_norm": 0.30903860926628113, |
| "learning_rate": 0.0001650618300204242, |
| "loss": 0.1708, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.9047619047619048, |
| "grad_norm": 0.24723806977272034, |
| "learning_rate": 0.00016427876096865394, |
| "loss": 0.1571, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.9142857142857143, |
| "grad_norm": 0.2727813422679901, |
| "learning_rate": 0.0001634889167409923, |
| "loss": 0.1779, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.9238095238095239, |
| "grad_norm": 0.2521083354949951, |
| "learning_rate": 0.0001626923805894107, |
| "loss": 0.1451, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.9333333333333333, |
| "grad_norm": 0.25517475605010986, |
| "learning_rate": 0.00016188923647122947, |
| "loss": 0.1789, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.9428571428571428, |
| "grad_norm": 0.26679396629333496, |
| "learning_rate": 0.0001610795690402688, |
| "loss": 0.1835, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.9523809523809523, |
| "grad_norm": 0.22741852700710297, |
| "learning_rate": 0.00016026346363792567, |
| "loss": 0.1599, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.9619047619047619, |
| "grad_norm": 0.28691986203193665, |
| "learning_rate": 0.00015944100628417868, |
| "loss": 0.1654, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.9714285714285714, |
| "grad_norm": 0.24840302765369415, |
| "learning_rate": 0.00015861228366852148, |
| "loss": 0.1584, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.9809523809523809, |
| "grad_norm": 0.24804599583148956, |
| "learning_rate": 0.00015777738314082514, |
| "loss": 0.1391, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.9904761904761905, |
| "grad_norm": 0.2463735193014145, |
| "learning_rate": 0.00015693639270213136, |
| "loss": 0.1408, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.29896217584609985, |
| "learning_rate": 0.000156089400995377, |
| "loss": 0.1666, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.0095238095238095, |
| "grad_norm": 0.2636161744594574, |
| "learning_rate": 0.0001552364972960506, |
| "loss": 0.1714, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 106 |
| }, |
| { |
| "epoch": 1.019047619047619, |
| "grad_norm": 0.2503117322921753, |
| "learning_rate": 0.00015437777150278267, |
| "loss": 0.1522, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 107 |
| }, |
| { |
| "epoch": 1.0285714285714285, |
| "grad_norm": 0.3032307028770447, |
| "learning_rate": 0.00015351331412787004, |
| "loss": 0.163, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.0380952380952382, |
| "grad_norm": 0.2652963399887085, |
| "learning_rate": 0.0001526432162877356, |
| "loss": 0.1589, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.0476190476190477, |
| "grad_norm": 0.2623184621334076, |
| "learning_rate": 0.00015176756969332425, |
| "loss": 0.1429, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.0571428571428572, |
| "grad_norm": 0.28222620487213135, |
| "learning_rate": 0.0001508864666404365, |
| "loss": 0.1469, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.0666666666666667, |
| "grad_norm": 0.28255829215049744, |
| "learning_rate": 0.00015000000000000001, |
| "loss": 0.156, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.0761904761904761, |
| "grad_norm": 0.22465772926807404, |
| "learning_rate": 0.00014910826320828084, |
| "loss": 0.131, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.0857142857142856, |
| "grad_norm": 0.30199363827705383, |
| "learning_rate": 0.0001482113502570349, |
| "loss": 0.1599, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.0952380952380953, |
| "grad_norm": 0.255288302898407, |
| "learning_rate": 0.00014730935568360102, |
| "loss": 0.1636, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.1047619047619048, |
| "grad_norm": 0.26142561435699463, |
| "learning_rate": 0.00014640237456093634, |
| "loss": 0.1518, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.1142857142857143, |
| "grad_norm": 0.30472439527511597, |
| "learning_rate": 0.00014549050248759547, |
| "loss": 0.1594, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.1238095238095238, |
| "grad_norm": 0.26955774426460266, |
| "learning_rate": 0.00014457383557765386, |
| "loss": 0.1676, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.1333333333333333, |
| "grad_norm": 0.34260597825050354, |
| "learning_rate": 0.00014365247045057734, |
| "loss": 0.1583, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.1428571428571428, |
| "grad_norm": 0.3136855661869049, |
| "learning_rate": 0.0001427265042210381, |
| "loss": 0.1558, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.1523809523809523, |
| "grad_norm": 0.27992814779281616, |
| "learning_rate": 0.00014179603448867835, |
| "loss": 0.1509, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.161904761904762, |
| "grad_norm": 0.34893789887428284, |
| "learning_rate": 0.00014086115932782314, |
| "loss": 0.1395, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.1714285714285715, |
| "grad_norm": 0.3308560252189636, |
| "learning_rate": 0.0001399219772771431, |
| "loss": 0.1591, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.180952380952381, |
| "grad_norm": 0.2736824154853821, |
| "learning_rate": 0.00013897858732926793, |
| "loss": 0.1604, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.1904761904761905, |
| "grad_norm": 0.28148430585861206, |
| "learning_rate": 0.0001380310889203526, |
| "loss": 0.1368, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.25899434089660645, |
| "learning_rate": 0.00013707958191959608, |
| "loss": 0.15, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.2095238095238094, |
| "grad_norm": 0.32127872109413147, |
| "learning_rate": 0.00013612416661871533, |
| "loss": 0.1522, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.2190476190476192, |
| "grad_norm": 0.350427508354187, |
| "learning_rate": 0.00013516494372137368, |
| "loss": 0.1361, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.2285714285714286, |
| "grad_norm": 0.36603009700775146, |
| "learning_rate": 0.00013420201433256689, |
| "loss": 0.1527, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.2380952380952381, |
| "grad_norm": 0.2870017886161804, |
| "learning_rate": 0.00013323547994796597, |
| "loss": 0.152, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.2476190476190476, |
| "grad_norm": 0.29138877987861633, |
| "learning_rate": 0.0001322654424432195, |
| "loss": 0.1599, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.2571428571428571, |
| "grad_norm": 0.3358840048313141, |
| "learning_rate": 0.00013129200406321545, |
| "loss": 0.1668, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.2666666666666666, |
| "grad_norm": 0.3152446746826172, |
| "learning_rate": 0.00013031526741130435, |
| "loss": 0.1382, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.276190476190476, |
| "grad_norm": 0.2602708041667938, |
| "learning_rate": 0.00012933533543848461, |
| "loss": 0.1441, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.2857142857142856, |
| "grad_norm": 0.27083972096443176, |
| "learning_rate": 0.0001283523114325511, |
| "loss": 0.1517, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.2952380952380953, |
| "grad_norm": 0.27242034673690796, |
| "learning_rate": 0.0001273662990072083, |
| "loss": 0.1511, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.3047619047619048, |
| "grad_norm": 0.28473100066185, |
| "learning_rate": 0.0001263774020911492, |
| "loss": 0.1339, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.3142857142857143, |
| "grad_norm": 0.27939581871032715, |
| "learning_rate": 0.0001253857249171008, |
| "loss": 0.1414, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.3238095238095238, |
| "grad_norm": 0.2833685874938965, |
| "learning_rate": 0.00012439137201083773, |
| "loss": 0.139, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 0.3193693161010742, |
| "learning_rate": 0.0001233944481801649, |
| "loss": 0.159, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.342857142857143, |
| "grad_norm": 0.28556281328201294, |
| "learning_rate": 0.0001223950585038703, |
| "loss": 0.1549, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.3523809523809525, |
| "grad_norm": 0.2670241594314575, |
| "learning_rate": 0.00012139330832064974, |
| "loss": 0.1595, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.361904761904762, |
| "grad_norm": 0.20607773959636688, |
| "learning_rate": 0.00012038930321800346, |
| "loss": 0.1299, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.3714285714285714, |
| "grad_norm": 0.2973734736442566, |
| "learning_rate": 0.00011938314902110701, |
| "loss": 0.1552, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.380952380952381, |
| "grad_norm": 0.3032817244529724, |
| "learning_rate": 0.00011837495178165706, |
| "loss": 0.1554, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.3904761904761904, |
| "grad_norm": 0.2765556871891022, |
| "learning_rate": 0.00011736481776669306, |
| "loss": 0.1346, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.24869301915168762, |
| "learning_rate": 0.00011635285344739651, |
| "loss": 0.1573, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.4095238095238094, |
| "grad_norm": 0.28329774737358093, |
| "learning_rate": 0.00011533916548786857, |
| "loss": 0.166, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.4190476190476191, |
| "grad_norm": 0.31719744205474854, |
| "learning_rate": 0.00011432386073388717, |
| "loss": 0.1556, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 0.27932024002075195, |
| "learning_rate": 0.00011330704620164538, |
| "loss": 0.1722, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.438095238095238, |
| "grad_norm": 0.2447134554386139, |
| "learning_rate": 0.00011228882906647142, |
| "loss": 0.1318, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.4476190476190476, |
| "grad_norm": 0.2684175670146942, |
| "learning_rate": 0.00011126931665153212, |
| "loss": 0.1465, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.457142857142857, |
| "grad_norm": 0.24706321954727173, |
| "learning_rate": 0.00011024861641652071, |
| "loss": 0.1226, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.4666666666666668, |
| "grad_norm": 0.2958153486251831, |
| "learning_rate": 0.00010922683594633021, |
| "loss": 0.1447, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.4761904761904763, |
| "grad_norm": 0.305876761674881, |
| "learning_rate": 0.00010820408293971378, |
| "loss": 0.1566, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.4857142857142858, |
| "grad_norm": 0.2755098342895508, |
| "learning_rate": 0.00010718046519793276, |
| "loss": 0.155, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.4952380952380953, |
| "grad_norm": 0.2550102174282074, |
| "learning_rate": 0.00010615609061339432, |
| "loss": 0.1227, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.5047619047619047, |
| "grad_norm": 0.26932892203330994, |
| "learning_rate": 0.00010513106715827896, |
| "loss": 0.1533, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.5142857142857142, |
| "grad_norm": 0.24724788963794708, |
| "learning_rate": 0.00010410550287316034, |
| "loss": 0.1346, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.5238095238095237, |
| "grad_norm": 0.2605031132698059, |
| "learning_rate": 0.00010307950585561706, |
| "loss": 0.1319, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.5333333333333332, |
| "grad_norm": 0.28353723883628845, |
| "learning_rate": 0.00010205318424883905, |
| "loss": 0.1386, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.5428571428571427, |
| "grad_norm": 0.31240761280059814, |
| "learning_rate": 0.00010102664623022899, |
| "loss": 0.1468, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.5523809523809524, |
| "grad_norm": 0.2614494562149048, |
| "learning_rate": 0.0001, |
| "loss": 0.146, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.47, |
| "memory/max_mem_allocated(gib)": 45.47, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.561904761904762, |
| "grad_norm": 0.28343483805656433, |
| "learning_rate": 9.897335376977102e-05, |
| "loss": 0.1531, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.5714285714285714, |
| "grad_norm": 0.2741709053516388, |
| "learning_rate": 9.794681575116097e-05, |
| "loss": 0.1304, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.580952380952381, |
| "grad_norm": 0.326215922832489, |
| "learning_rate": 9.692049414438299e-05, |
| "loss": 0.1551, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.5904761904761906, |
| "grad_norm": 0.3120001256465912, |
| "learning_rate": 9.58944971268397e-05, |
| "loss": 0.1387, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.2673357129096985, |
| "learning_rate": 9.486893284172102e-05, |
| "loss": 0.1389, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.6095238095238096, |
| "grad_norm": 0.3184024691581726, |
| "learning_rate": 9.384390938660572e-05, |
| "loss": 0.1388, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.619047619047619, |
| "grad_norm": 0.28744614124298096, |
| "learning_rate": 9.281953480206725e-05, |
| "loss": 0.1246, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.6285714285714286, |
| "grad_norm": 0.28229352831840515, |
| "learning_rate": 9.179591706028626e-05, |
| "loss": 0.1637, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.638095238095238, |
| "grad_norm": 0.2750794589519501, |
| "learning_rate": 9.077316405366981e-05, |
| "loss": 0.1176, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.6476190476190475, |
| "grad_norm": 0.28753307461738586, |
| "learning_rate": 8.975138358347931e-05, |
| "loss": 0.1517, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.657142857142857, |
| "grad_norm": 0.2737794518470764, |
| "learning_rate": 8.87306833484679e-05, |
| "loss": 0.1604, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.2456161379814148, |
| "learning_rate": 8.77111709335286e-05, |
| "loss": 0.1441, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.6761904761904762, |
| "grad_norm": 0.2895718812942505, |
| "learning_rate": 8.669295379835467e-05, |
| "loss": 0.152, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.6857142857142857, |
| "grad_norm": 0.3128141462802887, |
| "learning_rate": 8.567613926611288e-05, |
| "loss": 0.1692, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.6952380952380952, |
| "grad_norm": 0.29811859130859375, |
| "learning_rate": 8.466083451213144e-05, |
| "loss": 0.1351, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.704761904761905, |
| "grad_norm": 0.2686918377876282, |
| "learning_rate": 8.364714655260349e-05, |
| "loss": 0.1318, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.7142857142857144, |
| "grad_norm": 0.31242185831069946, |
| "learning_rate": 8.263518223330697e-05, |
| "loss": 0.1512, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.723809523809524, |
| "grad_norm": 0.3040373623371124, |
| "learning_rate": 8.162504821834295e-05, |
| "loss": 0.1464, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.7333333333333334, |
| "grad_norm": 0.2847082018852234, |
| "learning_rate": 8.0616850978893e-05, |
| "loss": 0.1586, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.7428571428571429, |
| "grad_norm": 0.27451273798942566, |
| "learning_rate": 7.961069678199658e-05, |
| "loss": 0.1553, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.7523809523809524, |
| "grad_norm": 0.3282047212123871, |
| "learning_rate": 7.860669167935028e-05, |
| "loss": 0.1559, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.7619047619047619, |
| "grad_norm": 0.2895129919052124, |
| "learning_rate": 7.760494149612971e-05, |
| "loss": 0.1431, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.7714285714285714, |
| "grad_norm": 0.2585572898387909, |
| "learning_rate": 7.660555181983518e-05, |
| "loss": 0.1274, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.7809523809523808, |
| "grad_norm": 0.2550938129425049, |
| "learning_rate": 7.560862798916228e-05, |
| "loss": 0.1482, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.7904761904761903, |
| "grad_norm": 0.267459511756897, |
| "learning_rate": 7.461427508289922e-05, |
| "loss": 0.1425, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.273129940032959, |
| "learning_rate": 7.36225979088508e-05, |
| "loss": 0.118, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.8095238095238095, |
| "grad_norm": 0.33791351318359375, |
| "learning_rate": 7.263370099279172e-05, |
| "loss": 0.146, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.819047619047619, |
| "grad_norm": 0.3014010488986969, |
| "learning_rate": 7.164768856744892e-05, |
| "loss": 0.1461, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.8285714285714287, |
| "grad_norm": 0.32012397050857544, |
| "learning_rate": 7.066466456151541e-05, |
| "loss": 0.1782, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.8380952380952382, |
| "grad_norm": 0.25507038831710815, |
| "learning_rate": 6.968473258869566e-05, |
| "loss": 0.1319, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.8476190476190477, |
| "grad_norm": 0.28470760583877563, |
| "learning_rate": 6.870799593678459e-05, |
| "loss": 0.159, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.8571428571428572, |
| "grad_norm": 0.25228601694107056, |
| "learning_rate": 6.773455755678054e-05, |
| "loss": 0.1284, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.8666666666666667, |
| "grad_norm": 0.28710344433784485, |
| "learning_rate": 6.676452005203406e-05, |
| "loss": 0.1338, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.8761904761904762, |
| "grad_norm": 0.29423221945762634, |
| "learning_rate": 6.579798566743314e-05, |
| "loss": 0.1335, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.8857142857142857, |
| "grad_norm": 0.31776273250579834, |
| "learning_rate": 6.483505627862632e-05, |
| "loss": 0.1509, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.8952380952380952, |
| "grad_norm": 0.2739746868610382, |
| "learning_rate": 6.387583338128471e-05, |
| "loss": 0.1195, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.9047619047619047, |
| "grad_norm": 0.28000062704086304, |
| "learning_rate": 6.292041808040393e-05, |
| "loss": 0.1444, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.9142857142857141, |
| "grad_norm": 0.2637692987918854, |
| "learning_rate": 6.196891107964744e-05, |
| "loss": 0.1412, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.9238095238095239, |
| "grad_norm": 0.2977365255355835, |
| "learning_rate": 6.102141267073207e-05, |
| "loss": 0.1224, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.9333333333333333, |
| "grad_norm": 0.2656545042991638, |
| "learning_rate": 6.007802272285693e-05, |
| "loss": 0.1235, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.9428571428571428, |
| "grad_norm": 0.3028334975242615, |
| "learning_rate": 5.913884067217685e-05, |
| "loss": 0.1544, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.9523809523809523, |
| "grad_norm": 0.32587432861328125, |
| "learning_rate": 5.82039655113217e-05, |
| "loss": 0.1484, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.961904761904762, |
| "grad_norm": 0.2999066710472107, |
| "learning_rate": 5.727349577896194e-05, |
| "loss": 0.1392, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.9714285714285715, |
| "grad_norm": 0.2822760045528412, |
| "learning_rate": 5.634752954942264e-05, |
| "loss": 0.1334, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.980952380952381, |
| "grad_norm": 0.4100743234157562, |
| "learning_rate": 5.542616442234618e-05, |
| "loss": 0.1295, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.9904761904761905, |
| "grad_norm": 0.22527122497558594, |
| "learning_rate": 5.450949751240456e-05, |
| "loss": 0.1377, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 209 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.33393946290016174, |
| "learning_rate": 5.359762543906368e-05, |
| "loss": 0.1251, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.0095238095238095, |
| "grad_norm": 0.2823657989501953, |
| "learning_rate": 5.269064431639901e-05, |
| "loss": 0.1443, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 211 |
| }, |
| { |
| "epoch": 2.019047619047619, |
| "grad_norm": 0.277386873960495, |
| "learning_rate": 5.178864974296511e-05, |
| "loss": 0.1333, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 212 |
| }, |
| { |
| "epoch": 2.0285714285714285, |
| "grad_norm": 0.2936922311782837, |
| "learning_rate": 5.0891736791719213e-05, |
| "loss": 0.1209, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 213 |
| }, |
| { |
| "epoch": 2.038095238095238, |
| "grad_norm": 0.33104684948921204, |
| "learning_rate": 5.000000000000002e-05, |
| "loss": 0.1344, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 214 |
| }, |
| { |
| "epoch": 2.0476190476190474, |
| "grad_norm": 0.2811061143875122, |
| "learning_rate": 4.911353335956352e-05, |
| "loss": 0.1182, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 215 |
| }, |
| { |
| "epoch": 2.057142857142857, |
| "grad_norm": 0.3159407377243042, |
| "learning_rate": 4.823243030667576e-05, |
| "loss": 0.1506, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 216 |
| }, |
| { |
| "epoch": 2.066666666666667, |
| "grad_norm": 0.2573259472846985, |
| "learning_rate": 4.735678371226441e-05, |
| "loss": 0.115, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 217 |
| }, |
| { |
| "epoch": 2.0761904761904764, |
| "grad_norm": 0.2847664952278137, |
| "learning_rate": 4.648668587212997e-05, |
| "loss": 0.1596, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 218 |
| }, |
| { |
| "epoch": 2.085714285714286, |
| "grad_norm": 0.31137990951538086, |
| "learning_rate": 4.562222849721735e-05, |
| "loss": 0.1389, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 219 |
| }, |
| { |
| "epoch": 2.0952380952380953, |
| "grad_norm": 0.29724523425102234, |
| "learning_rate": 4.476350270394942e-05, |
| "loss": 0.1286, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.104761904761905, |
| "grad_norm": 0.3148998022079468, |
| "learning_rate": 4.391059900462304e-05, |
| "loss": 0.1311, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 221 |
| }, |
| { |
| "epoch": 2.1142857142857143, |
| "grad_norm": 0.29536697268486023, |
| "learning_rate": 4.306360729786867e-05, |
| "loss": 0.1382, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 222 |
| }, |
| { |
| "epoch": 2.123809523809524, |
| "grad_norm": 0.29875388741493225, |
| "learning_rate": 4.222261685917489e-05, |
| "loss": 0.1321, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 223 |
| }, |
| { |
| "epoch": 2.1333333333333333, |
| "grad_norm": 0.27502039074897766, |
| "learning_rate": 4.1387716331478565e-05, |
| "loss": 0.1137, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.48, |
| "memory/max_mem_allocated(gib)": 45.48, |
| "step": 224 |
| }, |
| { |
| "epoch": 2.142857142857143, |
| "grad_norm": 0.3302501142024994, |
| "learning_rate": 4.055899371582133e-05, |
| "loss": 0.145, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 225 |
| }, |
| { |
| "epoch": 2.1523809523809523, |
| "grad_norm": 0.31309428811073303, |
| "learning_rate": 3.973653636207437e-05, |
| "loss": 0.1336, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 226 |
| }, |
| { |
| "epoch": 2.1619047619047618, |
| "grad_norm": 0.3851131796836853, |
| "learning_rate": 3.8920430959731226e-05, |
| "loss": 0.128, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 227 |
| }, |
| { |
| "epoch": 2.1714285714285713, |
| "grad_norm": 0.30255237221717834, |
| "learning_rate": 3.811076352877054e-05, |
| "loss": 0.1144, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 228 |
| }, |
| { |
| "epoch": 2.1809523809523808, |
| "grad_norm": 0.30191734433174133, |
| "learning_rate": 3.7307619410589376e-05, |
| "loss": 0.1177, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 229 |
| }, |
| { |
| "epoch": 2.1904761904761907, |
| "grad_norm": 0.256660521030426, |
| "learning_rate": 3.651108325900773e-05, |
| "loss": 0.1013, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 0.26110267639160156, |
| "learning_rate": 3.5721239031346066e-05, |
| "loss": 0.1307, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 231 |
| }, |
| { |
| "epoch": 2.2095238095238097, |
| "grad_norm": 0.313667893409729, |
| "learning_rate": 3.493816997957582e-05, |
| "loss": 0.1524, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 232 |
| }, |
| { |
| "epoch": 2.219047619047619, |
| "grad_norm": 0.3256130516529083, |
| "learning_rate": 3.416195864154425e-05, |
| "loss": 0.1375, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 233 |
| }, |
| { |
| "epoch": 2.2285714285714286, |
| "grad_norm": 0.5965404510498047, |
| "learning_rate": 3.339268683227499e-05, |
| "loss": 0.1346, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 234 |
| }, |
| { |
| "epoch": 2.238095238095238, |
| "grad_norm": 0.25208190083503723, |
| "learning_rate": 3.263043563534428e-05, |
| "loss": 0.1068, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 235 |
| }, |
| { |
| "epoch": 2.2476190476190476, |
| "grad_norm": 0.24084089696407318, |
| "learning_rate": 3.187528539433458e-05, |
| "loss": 0.1093, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 236 |
| }, |
| { |
| "epoch": 2.257142857142857, |
| "grad_norm": 0.2939463257789612, |
| "learning_rate": 3.112731570436614e-05, |
| "loss": 0.1348, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 237 |
| }, |
| { |
| "epoch": 2.2666666666666666, |
| "grad_norm": 0.24885237216949463, |
| "learning_rate": 3.0386605403707346e-05, |
| "loss": 0.1158, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 238 |
| }, |
| { |
| "epoch": 2.276190476190476, |
| "grad_norm": 0.2684744894504547, |
| "learning_rate": 2.9653232565465016e-05, |
| "loss": 0.1094, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 239 |
| }, |
| { |
| "epoch": 2.2857142857142856, |
| "grad_norm": 0.33237671852111816, |
| "learning_rate": 2.8927274489355293e-05, |
| "loss": 0.1227, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.295238095238095, |
| "grad_norm": 0.2611919641494751, |
| "learning_rate": 2.8208807693555818e-05, |
| "loss": 0.1, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 241 |
| }, |
| { |
| "epoch": 2.3047619047619046, |
| "grad_norm": 0.34490758180618286, |
| "learning_rate": 2.7497907906640742e-05, |
| "loss": 0.124, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 242 |
| }, |
| { |
| "epoch": 2.314285714285714, |
| "grad_norm": 0.27448931336402893, |
| "learning_rate": 2.679465005959856e-05, |
| "loss": 0.1277, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 243 |
| }, |
| { |
| "epoch": 2.323809523809524, |
| "grad_norm": 0.2973083257675171, |
| "learning_rate": 2.6099108277934103e-05, |
| "loss": 0.1344, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 244 |
| }, |
| { |
| "epoch": 2.3333333333333335, |
| "grad_norm": 0.30860233306884766, |
| "learning_rate": 2.541135587385568e-05, |
| "loss": 0.1308, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 245 |
| }, |
| { |
| "epoch": 2.342857142857143, |
| "grad_norm": 0.26857587695121765, |
| "learning_rate": 2.4731465338547556e-05, |
| "loss": 0.1386, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 246 |
| }, |
| { |
| "epoch": 2.3523809523809525, |
| "grad_norm": 0.28812259435653687, |
| "learning_rate": 2.405950833452928e-05, |
| "loss": 0.1175, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 247 |
| }, |
| { |
| "epoch": 2.361904761904762, |
| "grad_norm": 0.2757267951965332, |
| "learning_rate": 2.339555568810221e-05, |
| "loss": 0.1144, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 248 |
| }, |
| { |
| "epoch": 2.3714285714285714, |
| "grad_norm": 0.26114368438720703, |
| "learning_rate": 2.2739677381884115e-05, |
| "loss": 0.1195, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 249 |
| }, |
| { |
| "epoch": 2.380952380952381, |
| "grad_norm": 0.2587968707084656, |
| "learning_rate": 2.2091942547432955e-05, |
| "loss": 0.1395, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.3904761904761904, |
| "grad_norm": 0.33188945055007935, |
| "learning_rate": 2.1452419457960137e-05, |
| "loss": 0.1456, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 251 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.29628974199295044, |
| "learning_rate": 2.0821175521134207e-05, |
| "loss": 0.1236, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 252 |
| }, |
| { |
| "epoch": 2.4095238095238094, |
| "grad_norm": 0.28231939673423767, |
| "learning_rate": 2.0198277271976052e-05, |
| "loss": 0.1265, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 253 |
| }, |
| { |
| "epoch": 2.419047619047619, |
| "grad_norm": 0.3047321140766144, |
| "learning_rate": 1.9583790365845822e-05, |
| "loss": 0.0983, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 254 |
| }, |
| { |
| "epoch": 2.4285714285714284, |
| "grad_norm": 0.29782846570014954, |
| "learning_rate": 1.8977779571522646e-05, |
| "loss": 0.1355, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 255 |
| }, |
| { |
| "epoch": 2.4380952380952383, |
| "grad_norm": 0.29758942127227783, |
| "learning_rate": 1.8380308764377842e-05, |
| "loss": 0.1258, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 256 |
| }, |
| { |
| "epoch": 2.447619047619048, |
| "grad_norm": 0.35419225692749023, |
| "learning_rate": 1.7791440919642177e-05, |
| "loss": 0.1163, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 257 |
| }, |
| { |
| "epoch": 2.4571428571428573, |
| "grad_norm": 0.2910211682319641, |
| "learning_rate": 1.7211238105768214e-05, |
| "loss": 0.1312, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 258 |
| }, |
| { |
| "epoch": 2.466666666666667, |
| "grad_norm": 0.27445051074028015, |
| "learning_rate": 1.663976147788806e-05, |
| "loss": 0.1384, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 259 |
| }, |
| { |
| "epoch": 2.4761904761904763, |
| "grad_norm": 0.3003697097301483, |
| "learning_rate": 1.607707127136734e-05, |
| "loss": 0.1288, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.4857142857142858, |
| "grad_norm": 0.2700045108795166, |
| "learning_rate": 1.5523226795456347e-05, |
| "loss": 0.1086, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 261 |
| }, |
| { |
| "epoch": 2.4952380952380953, |
| "grad_norm": 0.30866676568984985, |
| "learning_rate": 1.4978286427038601e-05, |
| "loss": 0.144, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 262 |
| }, |
| { |
| "epoch": 2.5047619047619047, |
| "grad_norm": 0.29147791862487793, |
| "learning_rate": 1.444230760447769e-05, |
| "loss": 0.1129, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 263 |
| }, |
| { |
| "epoch": 2.5142857142857142, |
| "grad_norm": 0.29668715596199036, |
| "learning_rate": 1.3915346821563235e-05, |
| "loss": 0.1346, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 264 |
| }, |
| { |
| "epoch": 2.5238095238095237, |
| "grad_norm": 0.2867170572280884, |
| "learning_rate": 1.339745962155613e-05, |
| "loss": 0.1216, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 265 |
| }, |
| { |
| "epoch": 2.533333333333333, |
| "grad_norm": 0.3114704489707947, |
| "learning_rate": 1.2888700591334223e-05, |
| "loss": 0.1229, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 266 |
| }, |
| { |
| "epoch": 2.5428571428571427, |
| "grad_norm": 0.30811193585395813, |
| "learning_rate": 1.2389123355638654e-05, |
| "loss": 0.1441, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 267 |
| }, |
| { |
| "epoch": 2.552380952380952, |
| "grad_norm": 0.3116905391216278, |
| "learning_rate": 1.1898780571421552e-05, |
| "loss": 0.1287, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 268 |
| }, |
| { |
| "epoch": 2.5619047619047617, |
| "grad_norm": 0.28540119528770447, |
| "learning_rate": 1.141772392229601e-05, |
| "loss": 0.1263, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 269 |
| }, |
| { |
| "epoch": 2.571428571428571, |
| "grad_norm": 0.4764971137046814, |
| "learning_rate": 1.0946004113088381e-05, |
| "loss": 0.1104, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.580952380952381, |
| "grad_norm": 0.3028562664985657, |
| "learning_rate": 1.0483670864493778e-05, |
| "loss": 0.1447, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 271 |
| }, |
| { |
| "epoch": 2.5904761904761906, |
| "grad_norm": 0.2907711863517761, |
| "learning_rate": 1.0030772907835483e-05, |
| "loss": 0.1231, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 272 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 0.2883255183696747, |
| "learning_rate": 9.587357979928413e-06, |
| "loss": 0.1295, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 273 |
| }, |
| { |
| "epoch": 2.6095238095238096, |
| "grad_norm": 0.2747456729412079, |
| "learning_rate": 9.153472818047625e-06, |
| "loss": 0.1436, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 274 |
| }, |
| { |
| "epoch": 2.619047619047619, |
| "grad_norm": 0.2808170020580292, |
| "learning_rate": 8.729163155001974e-06, |
| "loss": 0.1517, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 275 |
| }, |
| { |
| "epoch": 2.6285714285714286, |
| "grad_norm": 0.25573983788490295, |
| "learning_rate": 8.314473714313719e-06, |
| "loss": 0.1178, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 276 |
| }, |
| { |
| "epoch": 2.638095238095238, |
| "grad_norm": 0.3509344756603241, |
| "learning_rate": 7.909448205504632e-06, |
| "loss": 0.1356, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 277 |
| }, |
| { |
| "epoch": 2.6476190476190475, |
| "grad_norm": 0.24175764620304108, |
| "learning_rate": 7.514129319488839e-06, |
| "loss": 0.1017, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.49, |
| "memory/max_mem_allocated(gib)": 45.49, |
| "step": 278 |
| }, |
| { |
| "epoch": 2.657142857142857, |
| "grad_norm": 0.29153692722320557, |
| "learning_rate": 7.128558724072976e-06, |
| "loss": 0.1202, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 279 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.2581385672092438, |
| "learning_rate": 6.75277705956443e-06, |
| "loss": 0.1148, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.6761904761904765, |
| "grad_norm": 0.2992369532585144, |
| "learning_rate": 6.386823934487618e-06, |
| "loss": 0.1408, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 281 |
| }, |
| { |
| "epoch": 2.685714285714286, |
| "grad_norm": 0.26200878620147705, |
| "learning_rate": 6.030737921409169e-06, |
| "loss": 0.1104, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 282 |
| }, |
| { |
| "epoch": 2.6952380952380954, |
| "grad_norm": 0.3082549571990967, |
| "learning_rate": 5.684556552872256e-06, |
| "loss": 0.1312, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 283 |
| }, |
| { |
| "epoch": 2.704761904761905, |
| "grad_norm": 0.27713072299957275, |
| "learning_rate": 5.348316317440549e-06, |
| "loss": 0.1184, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 284 |
| }, |
| { |
| "epoch": 2.7142857142857144, |
| "grad_norm": 0.3070668876171112, |
| "learning_rate": 5.0220526558522274e-06, |
| "loss": 0.1323, |
| "memory/device_mem_reserved(gib)": 47.13, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.723809523809524, |
| "grad_norm": 0.2855824828147888, |
| "learning_rate": 4.705799957284351e-06, |
| "loss": 0.1352, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 286 |
| }, |
| { |
| "epoch": 2.7333333333333334, |
| "grad_norm": 0.2835365831851959, |
| "learning_rate": 4.399591555728233e-06, |
| "loss": 0.1111, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 287 |
| }, |
| { |
| "epoch": 2.742857142857143, |
| "grad_norm": 0.23606140911579132, |
| "learning_rate": 4.103459726475889e-06, |
| "loss": 0.1099, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 288 |
| }, |
| { |
| "epoch": 2.7523809523809524, |
| "grad_norm": 0.32546883821487427, |
| "learning_rate": 3.817435682718096e-06, |
| "loss": 0.1271, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 289 |
| }, |
| { |
| "epoch": 2.761904761904762, |
| "grad_norm": 0.29360413551330566, |
| "learning_rate": 3.541549572254488e-06, |
| "loss": 0.1328, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.7714285714285714, |
| "grad_norm": 0.2728084623813629, |
| "learning_rate": 3.275830474315855e-06, |
| "loss": 0.1279, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 291 |
| }, |
| { |
| "epoch": 2.780952380952381, |
| "grad_norm": 0.2540973424911499, |
| "learning_rate": 3.0203063964990617e-06, |
| "loss": 0.1172, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 292 |
| }, |
| { |
| "epoch": 2.7904761904761903, |
| "grad_norm": 0.2903876304626465, |
| "learning_rate": 2.7750042718150516e-06, |
| "loss": 0.1254, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 293 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.29096516966819763, |
| "learning_rate": 2.539949955849985e-06, |
| "loss": 0.1338, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 294 |
| }, |
| { |
| "epoch": 2.8095238095238093, |
| "grad_norm": 0.27256572246551514, |
| "learning_rate": 2.315168224039932e-06, |
| "loss": 0.1432, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 295 |
| }, |
| { |
| "epoch": 2.819047619047619, |
| "grad_norm": 0.3155645728111267, |
| "learning_rate": 2.100682769059548e-06, |
| "loss": 0.1174, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 296 |
| }, |
| { |
| "epoch": 2.8285714285714287, |
| "grad_norm": 0.3359803259372711, |
| "learning_rate": 1.8965161983247493e-06, |
| "loss": 0.1325, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 297 |
| }, |
| { |
| "epoch": 2.8380952380952382, |
| "grad_norm": 0.30490148067474365, |
| "learning_rate": 1.7026900316098215e-06, |
| "loss": 0.1162, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 298 |
| }, |
| { |
| "epoch": 2.8476190476190477, |
| "grad_norm": 0.29144126176834106, |
| "learning_rate": 1.5192246987791981e-06, |
| "loss": 0.1226, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 299 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 0.2892225682735443, |
| "learning_rate": 1.3461395376340502e-06, |
| "loss": 0.1336, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.8666666666666667, |
| "grad_norm": 0.27009886503219604, |
| "learning_rate": 1.1834527918740623e-06, |
| "loss": 0.1092, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 301 |
| }, |
| { |
| "epoch": 2.876190476190476, |
| "grad_norm": 0.2559907138347626, |
| "learning_rate": 1.0311816091744698e-06, |
| "loss": 0.1192, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 302 |
| }, |
| { |
| "epoch": 2.8857142857142857, |
| "grad_norm": 0.3120160400867462, |
| "learning_rate": 8.893420393786489e-07, |
| "loss": 0.1324, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 303 |
| }, |
| { |
| "epoch": 2.895238095238095, |
| "grad_norm": 0.3137604892253876, |
| "learning_rate": 7.579490328064265e-07, |
| "loss": 0.1284, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 304 |
| }, |
| { |
| "epoch": 2.9047619047619047, |
| "grad_norm": 0.27408653497695923, |
| "learning_rate": 6.370164386782285e-07, |
| "loss": 0.122, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 305 |
| }, |
| { |
| "epoch": 2.914285714285714, |
| "grad_norm": 0.3108055293560028, |
| "learning_rate": 5.265570036553813e-07, |
| "loss": 0.1196, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 306 |
| }, |
| { |
| "epoch": 2.923809523809524, |
| "grad_norm": 0.2910846471786499, |
| "learning_rate": 4.2658237049655323e-07, |
| "loss": 0.1293, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 307 |
| }, |
| { |
| "epoch": 2.9333333333333336, |
| "grad_norm": 0.2849004566669464, |
| "learning_rate": 3.371030768305583e-07, |
| "loss": 0.1333, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 308 |
| }, |
| { |
| "epoch": 2.942857142857143, |
| "grad_norm": 0.30985507369041443, |
| "learning_rate": 2.5812855404568905e-07, |
| "loss": 0.1446, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 309 |
| }, |
| { |
| "epoch": 2.9523809523809526, |
| "grad_norm": 0.30461379885673523, |
| "learning_rate": 1.8966712629558957e-07, |
| "loss": 0.1203, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.961904761904762, |
| "grad_norm": 0.29812026023864746, |
| "learning_rate": 1.3172600962190197e-07, |
| "loss": 0.1275, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 311 |
| }, |
| { |
| "epoch": 2.9714285714285715, |
| "grad_norm": 0.29495131969451904, |
| "learning_rate": 8.43113111936189e-08, |
| "loss": 0.1212, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 312 |
| }, |
| { |
| "epoch": 2.980952380952381, |
| "grad_norm": 0.27057307958602905, |
| "learning_rate": 4.74280286634099e-08, |
| "loss": 0.1146, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 313 |
| }, |
| { |
| "epoch": 2.9904761904761905, |
| "grad_norm": 0.3303758203983307, |
| "learning_rate": 2.108004964086474e-08, |
| "loss": 0.1273, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 314 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.26469886302948, |
| "learning_rate": 5.270151282688041e-09, |
| "loss": 0.1281, |
| "memory/device_mem_reserved(gib)": 47.14, |
| "memory/max_mem_active(gib)": 45.5, |
| "memory/max_mem_allocated(gib)": 45.5, |
| "step": 315 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 315, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.864148423206502e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|