| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 436, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0022935779816513763, | |
| "grad_norm": 0.12869106233119965, | |
| "learning_rate": 0.0, | |
| "loss": 0.1978, | |
| "memory/device_reserved (GiB)": 50.77, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 1, | |
| "tokens_per_second_per_gpu": 354.96 | |
| }, | |
| { | |
| "epoch": 0.0045871559633027525, | |
| "grad_norm": 0.15667210519313812, | |
| "learning_rate": 4.7619047619047615e-06, | |
| "loss": 0.2353, | |
| "memory/device_reserved (GiB)": 50.77, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 2, | |
| "tokens_per_second_per_gpu": 406.37 | |
| }, | |
| { | |
| "epoch": 0.006880733944954129, | |
| "grad_norm": 0.2217973917722702, | |
| "learning_rate": 9.523809523809523e-06, | |
| "loss": 0.2243, | |
| "memory/device_reserved (GiB)": 50.87, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 3, | |
| "tokens_per_second_per_gpu": 371.18 | |
| }, | |
| { | |
| "epoch": 0.009174311926605505, | |
| "grad_norm": 0.15948686003684998, | |
| "learning_rate": 1.4285714285714285e-05, | |
| "loss": 0.2392, | |
| "memory/device_reserved (GiB)": 50.87, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 4, | |
| "tokens_per_second_per_gpu": 414.48 | |
| }, | |
| { | |
| "epoch": 0.011467889908256881, | |
| "grad_norm": 0.153566375374794, | |
| "learning_rate": 1.9047619047619046e-05, | |
| "loss": 0.2182, | |
| "memory/device_reserved (GiB)": 50.87, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 5, | |
| "tokens_per_second_per_gpu": 369.22 | |
| }, | |
| { | |
| "epoch": 0.013761467889908258, | |
| "grad_norm": 0.1521972268819809, | |
| "learning_rate": 2.380952380952381e-05, | |
| "loss": 0.2112, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 6, | |
| "tokens_per_second_per_gpu": 429.31 | |
| }, | |
| { | |
| "epoch": 0.016055045871559634, | |
| "grad_norm": 0.168710395693779, | |
| "learning_rate": 2.857142857142857e-05, | |
| "loss": 0.226, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 7, | |
| "tokens_per_second_per_gpu": 417.78 | |
| }, | |
| { | |
| "epoch": 0.01834862385321101, | |
| "grad_norm": 0.13864850997924805, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.1884, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 8, | |
| "tokens_per_second_per_gpu": 439.56 | |
| }, | |
| { | |
| "epoch": 0.020642201834862386, | |
| "grad_norm": 0.15227903425693512, | |
| "learning_rate": 3.809523809523809e-05, | |
| "loss": 0.1996, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 9, | |
| "tokens_per_second_per_gpu": 411.33 | |
| }, | |
| { | |
| "epoch": 0.022935779816513763, | |
| "grad_norm": 0.13421630859375, | |
| "learning_rate": 4.2857142857142856e-05, | |
| "loss": 0.1599, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 10, | |
| "tokens_per_second_per_gpu": 496.3 | |
| }, | |
| { | |
| "epoch": 0.02522935779816514, | |
| "grad_norm": 0.14955134689807892, | |
| "learning_rate": 4.761904761904762e-05, | |
| "loss": 0.1735, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 11, | |
| "tokens_per_second_per_gpu": 372.95 | |
| }, | |
| { | |
| "epoch": 0.027522935779816515, | |
| "grad_norm": 0.1432778388261795, | |
| "learning_rate": 5.2380952380952384e-05, | |
| "loss": 0.1515, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 12, | |
| "tokens_per_second_per_gpu": 398.65 | |
| }, | |
| { | |
| "epoch": 0.02981651376146789, | |
| "grad_norm": 0.14163611829280853, | |
| "learning_rate": 5.714285714285714e-05, | |
| "loss": 0.1517, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 13, | |
| "tokens_per_second_per_gpu": 440.5 | |
| }, | |
| { | |
| "epoch": 0.03211009174311927, | |
| "grad_norm": 0.15477906167507172, | |
| "learning_rate": 6.19047619047619e-05, | |
| "loss": 0.1444, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 14, | |
| "tokens_per_second_per_gpu": 385.32 | |
| }, | |
| { | |
| "epoch": 0.034403669724770644, | |
| "grad_norm": 0.1055532768368721, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 0.1292, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 15, | |
| "tokens_per_second_per_gpu": 453.02 | |
| }, | |
| { | |
| "epoch": 0.03669724770642202, | |
| "grad_norm": 0.10180933028459549, | |
| "learning_rate": 7.142857142857143e-05, | |
| "loss": 0.1208, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 16, | |
| "tokens_per_second_per_gpu": 474.27 | |
| }, | |
| { | |
| "epoch": 0.0389908256880734, | |
| "grad_norm": 0.07999677956104279, | |
| "learning_rate": 7.619047619047618e-05, | |
| "loss": 0.132, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 17, | |
| "tokens_per_second_per_gpu": 382.05 | |
| }, | |
| { | |
| "epoch": 0.04128440366972477, | |
| "grad_norm": 0.09194924682378769, | |
| "learning_rate": 8.095238095238096e-05, | |
| "loss": 0.1067, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 18, | |
| "tokens_per_second_per_gpu": 398.61 | |
| }, | |
| { | |
| "epoch": 0.04357798165137615, | |
| "grad_norm": 0.0931428000330925, | |
| "learning_rate": 8.571428571428571e-05, | |
| "loss": 0.1088, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 19, | |
| "tokens_per_second_per_gpu": 447.07 | |
| }, | |
| { | |
| "epoch": 0.045871559633027525, | |
| "grad_norm": 0.06202042102813721, | |
| "learning_rate": 9.047619047619048e-05, | |
| "loss": 0.0962, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 20, | |
| "tokens_per_second_per_gpu": 382.57 | |
| }, | |
| { | |
| "epoch": 0.0481651376146789, | |
| "grad_norm": 0.04220607504248619, | |
| "learning_rate": 9.523809523809524e-05, | |
| "loss": 0.0963, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 21, | |
| "tokens_per_second_per_gpu": 423.29 | |
| }, | |
| { | |
| "epoch": 0.05045871559633028, | |
| "grad_norm": 0.050066106021404266, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1032, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 22, | |
| "tokens_per_second_per_gpu": 381.35 | |
| }, | |
| { | |
| "epoch": 0.052752293577981654, | |
| "grad_norm": 0.0557384118437767, | |
| "learning_rate": 9.999856734543933e-05, | |
| "loss": 0.1025, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 23, | |
| "tokens_per_second_per_gpu": 393.62 | |
| }, | |
| { | |
| "epoch": 0.05504587155963303, | |
| "grad_norm": 0.04612402245402336, | |
| "learning_rate": 9.999426946385727e-05, | |
| "loss": 0.0985, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 24, | |
| "tokens_per_second_per_gpu": 515.46 | |
| }, | |
| { | |
| "epoch": 0.05733944954128441, | |
| "grad_norm": 0.09721734374761581, | |
| "learning_rate": 9.998710660154898e-05, | |
| "loss": 0.1062, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 25, | |
| "tokens_per_second_per_gpu": 398.15 | |
| }, | |
| { | |
| "epoch": 0.05963302752293578, | |
| "grad_norm": 0.036745935678482056, | |
| "learning_rate": 9.997707916899079e-05, | |
| "loss": 0.1045, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 26, | |
| "tokens_per_second_per_gpu": 422.42 | |
| }, | |
| { | |
| "epoch": 0.06192660550458716, | |
| "grad_norm": 0.04298936203122139, | |
| "learning_rate": 9.996418774081658e-05, | |
| "loss": 0.0923, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 27, | |
| "tokens_per_second_per_gpu": 440.87 | |
| }, | |
| { | |
| "epoch": 0.06422018348623854, | |
| "grad_norm": 0.033536747097969055, | |
| "learning_rate": 9.994843305578486e-05, | |
| "loss": 0.096, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 28, | |
| "tokens_per_second_per_gpu": 370.28 | |
| }, | |
| { | |
| "epoch": 0.06651376146788991, | |
| "grad_norm": 0.03256046772003174, | |
| "learning_rate": 9.99298160167365e-05, | |
| "loss": 0.0832, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 29, | |
| "tokens_per_second_per_gpu": 357.19 | |
| }, | |
| { | |
| "epoch": 0.06880733944954129, | |
| "grad_norm": 0.042709868401288986, | |
| "learning_rate": 9.990833769054293e-05, | |
| "loss": 0.086, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 30, | |
| "tokens_per_second_per_gpu": 441.89 | |
| }, | |
| { | |
| "epoch": 0.07110091743119266, | |
| "grad_norm": 0.04347776621580124, | |
| "learning_rate": 9.988399930804504e-05, | |
| "loss": 0.1, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.77, | |
| "memory/max_allocated (GiB)": 48.77, | |
| "step": 31, | |
| "tokens_per_second_per_gpu": 348.66 | |
| }, | |
| { | |
| "epoch": 0.07339449541284404, | |
| "grad_norm": 0.030414681881666183, | |
| "learning_rate": 9.985680226398261e-05, | |
| "loss": 0.0811, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 32, | |
| "tokens_per_second_per_gpu": 435.28 | |
| }, | |
| { | |
| "epoch": 0.07568807339449542, | |
| "grad_norm": 0.034023743122816086, | |
| "learning_rate": 9.98267481169144e-05, | |
| "loss": 0.0743, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 33, | |
| "tokens_per_second_per_gpu": 482.51 | |
| }, | |
| { | |
| "epoch": 0.0779816513761468, | |
| "grad_norm": 0.03136487305164337, | |
| "learning_rate": 9.979383858912885e-05, | |
| "loss": 0.0739, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.08, | |
| "memory/max_allocated (GiB)": 49.08, | |
| "step": 34, | |
| "tokens_per_second_per_gpu": 496.59 | |
| }, | |
| { | |
| "epoch": 0.08027522935779817, | |
| "grad_norm": 0.028108298778533936, | |
| "learning_rate": 9.975807556654537e-05, | |
| "loss": 0.077, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 35, | |
| "tokens_per_second_per_gpu": 349.1 | |
| }, | |
| { | |
| "epoch": 0.08256880733944955, | |
| "grad_norm": 0.028020795434713364, | |
| "learning_rate": 9.971946109860626e-05, | |
| "loss": 0.0775, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 36, | |
| "tokens_per_second_per_gpu": 351.02 | |
| }, | |
| { | |
| "epoch": 0.08486238532110092, | |
| "grad_norm": 0.028756650164723396, | |
| "learning_rate": 9.967799739815925e-05, | |
| "loss": 0.0788, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 37, | |
| "tokens_per_second_per_gpu": 534.52 | |
| }, | |
| { | |
| "epoch": 0.0871559633027523, | |
| "grad_norm": 0.02806459739804268, | |
| "learning_rate": 9.963368684133072e-05, | |
| "loss": 0.0809, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 38, | |
| "tokens_per_second_per_gpu": 367.94 | |
| }, | |
| { | |
| "epoch": 0.08944954128440367, | |
| "grad_norm": 0.02387731708586216, | |
| "learning_rate": 9.958653196738954e-05, | |
| "loss": 0.0642, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 39, | |
| "tokens_per_second_per_gpu": 466.74 | |
| }, | |
| { | |
| "epoch": 0.09174311926605505, | |
| "grad_norm": 0.027889851480722427, | |
| "learning_rate": 9.953653547860151e-05, | |
| "loss": 0.0904, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 40, | |
| "tokens_per_second_per_gpu": 371.51 | |
| }, | |
| { | |
| "epoch": 0.09403669724770643, | |
| "grad_norm": 0.031659577041864395, | |
| "learning_rate": 9.948370024007454e-05, | |
| "loss": 0.081, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 41, | |
| "tokens_per_second_per_gpu": 479.04 | |
| }, | |
| { | |
| "epoch": 0.0963302752293578, | |
| "grad_norm": 0.03186093270778656, | |
| "learning_rate": 9.942802927959443e-05, | |
| "loss": 0.0881, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 42, | |
| "tokens_per_second_per_gpu": 364.73 | |
| }, | |
| { | |
| "epoch": 0.09862385321100918, | |
| "grad_norm": 0.0313677079975605, | |
| "learning_rate": 9.936952578745142e-05, | |
| "loss": 0.0808, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 43, | |
| "tokens_per_second_per_gpu": 418.0 | |
| }, | |
| { | |
| "epoch": 0.10091743119266056, | |
| "grad_norm": 0.0264989472925663, | |
| "learning_rate": 9.93081931162573e-05, | |
| "loss": 0.0664, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 44, | |
| "tokens_per_second_per_gpu": 439.24 | |
| }, | |
| { | |
| "epoch": 0.10321100917431193, | |
| "grad_norm": 0.026272334158420563, | |
| "learning_rate": 9.92440347807533e-05, | |
| "loss": 0.0683, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 45, | |
| "tokens_per_second_per_gpu": 482.81 | |
| }, | |
| { | |
| "epoch": 0.10550458715596331, | |
| "grad_norm": 0.029066840186715126, | |
| "learning_rate": 9.91770544576087e-05, | |
| "loss": 0.0737, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 46, | |
| "tokens_per_second_per_gpu": 389.87 | |
| }, | |
| { | |
| "epoch": 0.10779816513761468, | |
| "grad_norm": 0.024542706087231636, | |
| "learning_rate": 9.910725598521013e-05, | |
| "loss": 0.0737, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 47, | |
| "tokens_per_second_per_gpu": 473.12 | |
| }, | |
| { | |
| "epoch": 0.11009174311926606, | |
| "grad_norm": 0.042941153049468994, | |
| "learning_rate": 9.90346433634416e-05, | |
| "loss": 0.0951, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 48, | |
| "tokens_per_second_per_gpu": 325.12 | |
| }, | |
| { | |
| "epoch": 0.11238532110091744, | |
| "grad_norm": 0.029044413939118385, | |
| "learning_rate": 9.89592207534552e-05, | |
| "loss": 0.0745, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.73, | |
| "memory/max_allocated (GiB)": 48.73, | |
| "step": 49, | |
| "tokens_per_second_per_gpu": 315.62 | |
| }, | |
| { | |
| "epoch": 0.11467889908256881, | |
| "grad_norm": 0.028920788317918777, | |
| "learning_rate": 9.888099247743283e-05, | |
| "loss": 0.0818, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 50, | |
| "tokens_per_second_per_gpu": 441.3 | |
| }, | |
| { | |
| "epoch": 0.11697247706422019, | |
| "grad_norm": 0.026095205917954445, | |
| "learning_rate": 9.879996301833833e-05, | |
| "loss": 0.0688, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 51, | |
| "tokens_per_second_per_gpu": 386.22 | |
| }, | |
| { | |
| "epoch": 0.11926605504587157, | |
| "grad_norm": 0.024823926389217377, | |
| "learning_rate": 9.871613701966067e-05, | |
| "loss": 0.0701, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 52, | |
| "tokens_per_second_per_gpu": 511.32 | |
| }, | |
| { | |
| "epoch": 0.12155963302752294, | |
| "grad_norm": 0.036093298345804214, | |
| "learning_rate": 9.862951928514782e-05, | |
| "loss": 0.0823, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 53, | |
| "tokens_per_second_per_gpu": 323.2 | |
| }, | |
| { | |
| "epoch": 0.12385321100917432, | |
| "grad_norm": 0.03257686272263527, | |
| "learning_rate": 9.854011477853146e-05, | |
| "loss": 0.0769, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 54, | |
| "tokens_per_second_per_gpu": 447.62 | |
| }, | |
| { | |
| "epoch": 0.12614678899082568, | |
| "grad_norm": 0.03413158655166626, | |
| "learning_rate": 9.844792862324258e-05, | |
| "loss": 0.0728, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 55, | |
| "tokens_per_second_per_gpu": 451.05 | |
| }, | |
| { | |
| "epoch": 0.12844036697247707, | |
| "grad_norm": 0.02947932481765747, | |
| "learning_rate": 9.835296610211779e-05, | |
| "loss": 0.0713, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 56, | |
| "tokens_per_second_per_gpu": 457.44 | |
| }, | |
| { | |
| "epoch": 0.13073394495412843, | |
| "grad_norm": 0.0220651775598526, | |
| "learning_rate": 9.825523265709666e-05, | |
| "loss": 0.0607, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 57, | |
| "tokens_per_second_per_gpu": 456.49 | |
| }, | |
| { | |
| "epoch": 0.13302752293577982, | |
| "grad_norm": 0.026394842192530632, | |
| "learning_rate": 9.815473388890983e-05, | |
| "loss": 0.0716, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 58, | |
| "tokens_per_second_per_gpu": 393.95 | |
| }, | |
| { | |
| "epoch": 0.1353211009174312, | |
| "grad_norm": 0.027936838567256927, | |
| "learning_rate": 9.805147555675805e-05, | |
| "loss": 0.0738, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 59, | |
| "tokens_per_second_per_gpu": 464.83 | |
| }, | |
| { | |
| "epoch": 0.13761467889908258, | |
| "grad_norm": 0.023982539772987366, | |
| "learning_rate": 9.794546357798208e-05, | |
| "loss": 0.0608, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 60, | |
| "tokens_per_second_per_gpu": 450.66 | |
| }, | |
| { | |
| "epoch": 0.13990825688073394, | |
| "grad_norm": 0.027479754760861397, | |
| "learning_rate": 9.783670402772379e-05, | |
| "loss": 0.0672, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 61, | |
| "tokens_per_second_per_gpu": 455.94 | |
| }, | |
| { | |
| "epoch": 0.14220183486238533, | |
| "grad_norm": 0.02617599070072174, | |
| "learning_rate": 9.772520313857775e-05, | |
| "loss": 0.0804, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 62, | |
| "tokens_per_second_per_gpu": 394.85 | |
| }, | |
| { | |
| "epoch": 0.1444954128440367, | |
| "grad_norm": 0.030884992331266403, | |
| "learning_rate": 9.761096730023432e-05, | |
| "loss": 0.0768, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 63, | |
| "tokens_per_second_per_gpu": 446.63 | |
| }, | |
| { | |
| "epoch": 0.14678899082568808, | |
| "grad_norm": 0.027579287067055702, | |
| "learning_rate": 9.749400305911322e-05, | |
| "loss": 0.0659, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 64, | |
| "tokens_per_second_per_gpu": 484.34 | |
| }, | |
| { | |
| "epoch": 0.14908256880733944, | |
| "grad_norm": 0.030303625389933586, | |
| "learning_rate": 9.737431711798864e-05, | |
| "loss": 0.0645, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 65, | |
| "tokens_per_second_per_gpu": 437.07 | |
| }, | |
| { | |
| "epoch": 0.15137614678899083, | |
| "grad_norm": 0.027446158230304718, | |
| "learning_rate": 9.725191633560491e-05, | |
| "loss": 0.08, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 66, | |
| "tokens_per_second_per_gpu": 411.5 | |
| }, | |
| { | |
| "epoch": 0.1536697247706422, | |
| "grad_norm": 0.03177177160978317, | |
| "learning_rate": 9.712680772628364e-05, | |
| "loss": 0.0801, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 67, | |
| "tokens_per_second_per_gpu": 429.18 | |
| }, | |
| { | |
| "epoch": 0.1559633027522936, | |
| "grad_norm": 0.0288909412920475, | |
| "learning_rate": 9.69989984595216e-05, | |
| "loss": 0.0707, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 68, | |
| "tokens_per_second_per_gpu": 408.55 | |
| }, | |
| { | |
| "epoch": 0.15825688073394495, | |
| "grad_norm": 0.02751251310110092, | |
| "learning_rate": 9.686849585957994e-05, | |
| "loss": 0.0736, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 69, | |
| "tokens_per_second_per_gpu": 420.0 | |
| }, | |
| { | |
| "epoch": 0.16055045871559634, | |
| "grad_norm": 0.023428168147802353, | |
| "learning_rate": 9.673530740506447e-05, | |
| "loss": 0.0648, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 70, | |
| "tokens_per_second_per_gpu": 512.59 | |
| }, | |
| { | |
| "epoch": 0.1628440366972477, | |
| "grad_norm": 0.031534772366285324, | |
| "learning_rate": 9.659944072849707e-05, | |
| "loss": 0.0818, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 71, | |
| "tokens_per_second_per_gpu": 456.9 | |
| }, | |
| { | |
| "epoch": 0.1651376146788991, | |
| "grad_norm": 0.027208171784877777, | |
| "learning_rate": 9.646090361587827e-05, | |
| "loss": 0.0709, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 72, | |
| "tokens_per_second_per_gpu": 378.48 | |
| }, | |
| { | |
| "epoch": 0.16743119266055045, | |
| "grad_norm": 0.02961639314889908, | |
| "learning_rate": 9.631970400624113e-05, | |
| "loss": 0.0764, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 73, | |
| "tokens_per_second_per_gpu": 316.38 | |
| }, | |
| { | |
| "epoch": 0.16972477064220184, | |
| "grad_norm": 0.027367761358618736, | |
| "learning_rate": 9.617584999119625e-05, | |
| "loss": 0.0672, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 74, | |
| "tokens_per_second_per_gpu": 402.44 | |
| }, | |
| { | |
| "epoch": 0.1720183486238532, | |
| "grad_norm": 0.030167503282427788, | |
| "learning_rate": 9.602934981446803e-05, | |
| "loss": 0.0743, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 75, | |
| "tokens_per_second_per_gpu": 531.29 | |
| }, | |
| { | |
| "epoch": 0.1743119266055046, | |
| "grad_norm": 0.0387263149023056, | |
| "learning_rate": 9.588021187142235e-05, | |
| "loss": 0.083, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 76, | |
| "tokens_per_second_per_gpu": 424.59 | |
| }, | |
| { | |
| "epoch": 0.17660550458715596, | |
| "grad_norm": 0.027617793530225754, | |
| "learning_rate": 9.572844470858537e-05, | |
| "loss": 0.0769, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 77, | |
| "tokens_per_second_per_gpu": 461.9 | |
| }, | |
| { | |
| "epoch": 0.17889908256880735, | |
| "grad_norm": 0.029771512374281883, | |
| "learning_rate": 9.557405702315381e-05, | |
| "loss": 0.0658, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 78, | |
| "tokens_per_second_per_gpu": 475.77 | |
| }, | |
| { | |
| "epoch": 0.1811926605504587, | |
| "grad_norm": 0.029358675703406334, | |
| "learning_rate": 9.541705766249655e-05, | |
| "loss": 0.066, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 79, | |
| "tokens_per_second_per_gpu": 489.33 | |
| }, | |
| { | |
| "epoch": 0.1834862385321101, | |
| "grad_norm": 0.023111771792173386, | |
| "learning_rate": 9.525745562364756e-05, | |
| "loss": 0.066, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 80, | |
| "tokens_per_second_per_gpu": 382.84 | |
| }, | |
| { | |
| "epoch": 0.18577981651376146, | |
| "grad_norm": 0.029448291286826134, | |
| "learning_rate": 9.509526005279044e-05, | |
| "loss": 0.0608, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 81, | |
| "tokens_per_second_per_gpu": 415.81 | |
| }, | |
| { | |
| "epoch": 0.18807339449541285, | |
| "grad_norm": 0.02794116735458374, | |
| "learning_rate": 9.493048024473412e-05, | |
| "loss": 0.0736, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 82, | |
| "tokens_per_second_per_gpu": 400.02 | |
| }, | |
| { | |
| "epoch": 0.19036697247706422, | |
| "grad_norm": 0.04534873738884926, | |
| "learning_rate": 9.476312564238034e-05, | |
| "loss": 0.0673, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 83, | |
| "tokens_per_second_per_gpu": 369.1 | |
| }, | |
| { | |
| "epoch": 0.1926605504587156, | |
| "grad_norm": 0.026540853083133698, | |
| "learning_rate": 9.459320583618252e-05, | |
| "loss": 0.0558, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 84, | |
| "tokens_per_second_per_gpu": 611.61 | |
| }, | |
| { | |
| "epoch": 0.19495412844036697, | |
| "grad_norm": 0.03129403293132782, | |
| "learning_rate": 9.442073056359604e-05, | |
| "loss": 0.0741, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 85, | |
| "tokens_per_second_per_gpu": 492.16 | |
| }, | |
| { | |
| "epoch": 0.19724770642201836, | |
| "grad_norm": 0.027526071295142174, | |
| "learning_rate": 9.424570970852034e-05, | |
| "loss": 0.0733, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 86, | |
| "tokens_per_second_per_gpu": 427.76 | |
| }, | |
| { | |
| "epoch": 0.19954128440366972, | |
| "grad_norm": 0.025468798354268074, | |
| "learning_rate": 9.406815330073244e-05, | |
| "loss": 0.0613, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 87, | |
| "tokens_per_second_per_gpu": 462.82 | |
| }, | |
| { | |
| "epoch": 0.2018348623853211, | |
| "grad_norm": 0.029043635353446007, | |
| "learning_rate": 9.388807151531229e-05, | |
| "loss": 0.0758, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 88, | |
| "tokens_per_second_per_gpu": 353.91 | |
| }, | |
| { | |
| "epoch": 0.20412844036697247, | |
| "grad_norm": 0.03196391835808754, | |
| "learning_rate": 9.37054746720595e-05, | |
| "loss": 0.0678, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 89, | |
| "tokens_per_second_per_gpu": 411.71 | |
| }, | |
| { | |
| "epoch": 0.20642201834862386, | |
| "grad_norm": 0.033272091299295425, | |
| "learning_rate": 9.352037323490208e-05, | |
| "loss": 0.0722, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 90, | |
| "tokens_per_second_per_gpu": 398.81 | |
| }, | |
| { | |
| "epoch": 0.20871559633027523, | |
| "grad_norm": 0.03096090629696846, | |
| "learning_rate": 9.333277781129678e-05, | |
| "loss": 0.0809, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 91, | |
| "tokens_per_second_per_gpu": 393.81 | |
| }, | |
| { | |
| "epoch": 0.21100917431192662, | |
| "grad_norm": 0.026267440989613533, | |
| "learning_rate": 9.314269915162114e-05, | |
| "loss": 0.0604, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 92, | |
| "tokens_per_second_per_gpu": 453.78 | |
| }, | |
| { | |
| "epoch": 0.21330275229357798, | |
| "grad_norm": 0.02608361840248108, | |
| "learning_rate": 9.295014814855753e-05, | |
| "loss": 0.0663, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 93, | |
| "tokens_per_second_per_gpu": 430.47 | |
| }, | |
| { | |
| "epoch": 0.21559633027522937, | |
| "grad_norm": 0.024829065427184105, | |
| "learning_rate": 9.275513583646884e-05, | |
| "loss": 0.0598, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 94, | |
| "tokens_per_second_per_gpu": 384.01 | |
| }, | |
| { | |
| "epoch": 0.21788990825688073, | |
| "grad_norm": 0.03385532647371292, | |
| "learning_rate": 9.255767339076622e-05, | |
| "loss": 0.0719, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 95, | |
| "tokens_per_second_per_gpu": 440.35 | |
| }, | |
| { | |
| "epoch": 0.22018348623853212, | |
| "grad_norm": 0.029608217999339104, | |
| "learning_rate": 9.23577721272686e-05, | |
| "loss": 0.094, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 96, | |
| "tokens_per_second_per_gpu": 485.56 | |
| }, | |
| { | |
| "epoch": 0.22247706422018348, | |
| "grad_norm": 0.02693762816488743, | |
| "learning_rate": 9.215544350155422e-05, | |
| "loss": 0.0755, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 97, | |
| "tokens_per_second_per_gpu": 432.16 | |
| }, | |
| { | |
| "epoch": 0.22477064220183487, | |
| "grad_norm": 0.02771424688398838, | |
| "learning_rate": 9.195069910830427e-05, | |
| "loss": 0.0692, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 98, | |
| "tokens_per_second_per_gpu": 412.93 | |
| }, | |
| { | |
| "epoch": 0.22706422018348624, | |
| "grad_norm": 0.02276022732257843, | |
| "learning_rate": 9.174355068063828e-05, | |
| "loss": 0.0637, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 99, | |
| "tokens_per_second_per_gpu": 418.24 | |
| }, | |
| { | |
| "epoch": 0.22935779816513763, | |
| "grad_norm": 0.026155246421694756, | |
| "learning_rate": 9.15340100894418e-05, | |
| "loss": 0.0698, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 100, | |
| "tokens_per_second_per_gpu": 403.6 | |
| }, | |
| { | |
| "epoch": 0.231651376146789, | |
| "grad_norm": 0.022778436541557312, | |
| "learning_rate": 9.132208934268622e-05, | |
| "loss": 0.0654, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 101, | |
| "tokens_per_second_per_gpu": 491.32 | |
| }, | |
| { | |
| "epoch": 0.23394495412844038, | |
| "grad_norm": 0.04701945558190346, | |
| "learning_rate": 9.110780058474052e-05, | |
| "loss": 0.0741, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 102, | |
| "tokens_per_second_per_gpu": 444.03 | |
| }, | |
| { | |
| "epoch": 0.23623853211009174, | |
| "grad_norm": 0.030211661010980606, | |
| "learning_rate": 9.08911560956753e-05, | |
| "loss": 0.0789, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 103, | |
| "tokens_per_second_per_gpu": 514.87 | |
| }, | |
| { | |
| "epoch": 0.23853211009174313, | |
| "grad_norm": 0.026159459725022316, | |
| "learning_rate": 9.067216829055922e-05, | |
| "loss": 0.0637, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 104, | |
| "tokens_per_second_per_gpu": 446.47 | |
| }, | |
| { | |
| "epoch": 0.2408256880733945, | |
| "grad_norm": 0.02918146923184395, | |
| "learning_rate": 9.045084971874738e-05, | |
| "loss": 0.0727, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 105, | |
| "tokens_per_second_per_gpu": 425.37 | |
| }, | |
| { | |
| "epoch": 0.24311926605504589, | |
| "grad_norm": 0.03170175105333328, | |
| "learning_rate": 9.022721306316222e-05, | |
| "loss": 0.0857, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 106, | |
| "tokens_per_second_per_gpu": 301.79 | |
| }, | |
| { | |
| "epoch": 0.24541284403669725, | |
| "grad_norm": 0.032674651592969894, | |
| "learning_rate": 9.000127113956674e-05, | |
| "loss": 0.0795, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.77, | |
| "memory/max_allocated (GiB)": 48.77, | |
| "step": 107, | |
| "tokens_per_second_per_gpu": 338.41 | |
| }, | |
| { | |
| "epoch": 0.24770642201834864, | |
| "grad_norm": 0.026492780074477196, | |
| "learning_rate": 8.977303689583e-05, | |
| "loss": 0.0775, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 108, | |
| "tokens_per_second_per_gpu": 383.35 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.0290480125695467, | |
| "learning_rate": 8.954252341118523e-05, | |
| "loss": 0.076, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 109, | |
| "tokens_per_second_per_gpu": 382.78 | |
| }, | |
| { | |
| "epoch": 0.25229357798165136, | |
| "grad_norm": 0.030473977327346802, | |
| "learning_rate": 8.930974389548023e-05, | |
| "loss": 0.0761, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 110, | |
| "tokens_per_second_per_gpu": 476.56 | |
| }, | |
| { | |
| "epoch": 0.2545871559633027, | |
| "grad_norm": 0.02930077351629734, | |
| "learning_rate": 8.90747116884204e-05, | |
| "loss": 0.0691, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 111, | |
| "tokens_per_second_per_gpu": 441.2 | |
| }, | |
| { | |
| "epoch": 0.25688073394495414, | |
| "grad_norm": 0.02884151227772236, | |
| "learning_rate": 8.883744025880428e-05, | |
| "loss": 0.0806, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 112, | |
| "tokens_per_second_per_gpu": 406.96 | |
| }, | |
| { | |
| "epoch": 0.2591743119266055, | |
| "grad_norm": 0.02618175558745861, | |
| "learning_rate": 8.859794320375168e-05, | |
| "loss": 0.0677, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 113, | |
| "tokens_per_second_per_gpu": 430.04 | |
| }, | |
| { | |
| "epoch": 0.26146788990825687, | |
| "grad_norm": 0.026963548734784126, | |
| "learning_rate": 8.835623424792452e-05, | |
| "loss": 0.0694, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 114, | |
| "tokens_per_second_per_gpu": 351.9 | |
| }, | |
| { | |
| "epoch": 0.26376146788990823, | |
| "grad_norm": 0.021544624119997025, | |
| "learning_rate": 8.811232724274035e-05, | |
| "loss": 0.0613, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 115, | |
| "tokens_per_second_per_gpu": 480.22 | |
| }, | |
| { | |
| "epoch": 0.26605504587155965, | |
| "grad_norm": 0.03840009495615959, | |
| "learning_rate": 8.786623616557847e-05, | |
| "loss": 0.0723, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 116, | |
| "tokens_per_second_per_gpu": 433.18 | |
| }, | |
| { | |
| "epoch": 0.268348623853211, | |
| "grad_norm": 0.022571468725800514, | |
| "learning_rate": 8.761797511897906e-05, | |
| "loss": 0.065, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 117, | |
| "tokens_per_second_per_gpu": 421.92 | |
| }, | |
| { | |
| "epoch": 0.2706422018348624, | |
| "grad_norm": 0.02688576467335224, | |
| "learning_rate": 8.736755832983497e-05, | |
| "loss": 0.0772, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 118, | |
| "tokens_per_second_per_gpu": 354.3 | |
| }, | |
| { | |
| "epoch": 0.27293577981651373, | |
| "grad_norm": 0.025858785957098007, | |
| "learning_rate": 8.711500014857634e-05, | |
| "loss": 0.0745, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 119, | |
| "tokens_per_second_per_gpu": 365.46 | |
| }, | |
| { | |
| "epoch": 0.27522935779816515, | |
| "grad_norm": 0.02718079835176468, | |
| "learning_rate": 8.686031504834843e-05, | |
| "loss": 0.0759, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 120, | |
| "tokens_per_second_per_gpu": 426.06 | |
| }, | |
| { | |
| "epoch": 0.2775229357798165, | |
| "grad_norm": 0.028197383508086205, | |
| "learning_rate": 8.660351762418203e-05, | |
| "loss": 0.0753, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 121, | |
| "tokens_per_second_per_gpu": 483.89 | |
| }, | |
| { | |
| "epoch": 0.2798165137614679, | |
| "grad_norm": 0.02615584433078766, | |
| "learning_rate": 8.634462259215719e-05, | |
| "loss": 0.0692, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 122, | |
| "tokens_per_second_per_gpu": 347.59 | |
| }, | |
| { | |
| "epoch": 0.28211009174311924, | |
| "grad_norm": 0.028645118698477745, | |
| "learning_rate": 8.608364478855983e-05, | |
| "loss": 0.0784, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 123, | |
| "tokens_per_second_per_gpu": 472.02 | |
| }, | |
| { | |
| "epoch": 0.28440366972477066, | |
| "grad_norm": 0.03761473670601845, | |
| "learning_rate": 8.58205991690316e-05, | |
| "loss": 0.0663, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 124, | |
| "tokens_per_second_per_gpu": 439.34 | |
| }, | |
| { | |
| "epoch": 0.286697247706422, | |
| "grad_norm": 0.024080324918031693, | |
| "learning_rate": 8.555550080771273e-05, | |
| "loss": 0.0685, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 125, | |
| "tokens_per_second_per_gpu": 413.4 | |
| }, | |
| { | |
| "epoch": 0.2889908256880734, | |
| "grad_norm": 0.03224342688918114, | |
| "learning_rate": 8.528836489637828e-05, | |
| "loss": 0.0813, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.77, | |
| "memory/max_allocated (GiB)": 48.77, | |
| "step": 126, | |
| "tokens_per_second_per_gpu": 299.66 | |
| }, | |
| { | |
| "epoch": 0.29128440366972475, | |
| "grad_norm": 0.02632022649049759, | |
| "learning_rate": 8.501920674356754e-05, | |
| "loss": 0.0649, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 127, | |
| "tokens_per_second_per_gpu": 424.46 | |
| }, | |
| { | |
| "epoch": 0.29357798165137616, | |
| "grad_norm": 0.025439690798521042, | |
| "learning_rate": 8.47480417737067e-05, | |
| "loss": 0.0692, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 128, | |
| "tokens_per_second_per_gpu": 443.94 | |
| }, | |
| { | |
| "epoch": 0.2958715596330275, | |
| "grad_norm": 0.028366245329380035, | |
| "learning_rate": 8.447488552622498e-05, | |
| "loss": 0.0743, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 129, | |
| "tokens_per_second_per_gpu": 392.47 | |
| }, | |
| { | |
| "epoch": 0.2981651376146789, | |
| "grad_norm": 0.028246046975255013, | |
| "learning_rate": 8.419975365466415e-05, | |
| "loss": 0.0693, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.77, | |
| "memory/max_allocated (GiB)": 48.77, | |
| "step": 130, | |
| "tokens_per_second_per_gpu": 385.79 | |
| }, | |
| { | |
| "epoch": 0.30045871559633025, | |
| "grad_norm": 0.029451027512550354, | |
| "learning_rate": 8.392266192578143e-05, | |
| "loss": 0.0731, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 131, | |
| "tokens_per_second_per_gpu": 401.98 | |
| }, | |
| { | |
| "epoch": 0.30275229357798167, | |
| "grad_norm": 0.03156789019703865, | |
| "learning_rate": 8.364362621864595e-05, | |
| "loss": 0.0733, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 132, | |
| "tokens_per_second_per_gpu": 406.2 | |
| }, | |
| { | |
| "epoch": 0.30504587155963303, | |
| "grad_norm": 0.0247171763330698, | |
| "learning_rate": 8.336266252372889e-05, | |
| "loss": 0.0723, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 133, | |
| "tokens_per_second_per_gpu": 467.27 | |
| }, | |
| { | |
| "epoch": 0.3073394495412844, | |
| "grad_norm": 0.024775700643658638, | |
| "learning_rate": 8.307978694198699e-05, | |
| "loss": 0.0644, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 134, | |
| "tokens_per_second_per_gpu": 377.14 | |
| }, | |
| { | |
| "epoch": 0.30963302752293576, | |
| "grad_norm": 0.025003118440508842, | |
| "learning_rate": 8.279501568393994e-05, | |
| "loss": 0.0684, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 135, | |
| "tokens_per_second_per_gpu": 368.87 | |
| }, | |
| { | |
| "epoch": 0.3119266055045872, | |
| "grad_norm": 0.028482772409915924, | |
| "learning_rate": 8.250836506874142e-05, | |
| "loss": 0.0705, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 136, | |
| "tokens_per_second_per_gpu": 439.79 | |
| }, | |
| { | |
| "epoch": 0.31422018348623854, | |
| "grad_norm": 0.02605322189629078, | |
| "learning_rate": 8.221985152324385e-05, | |
| "loss": 0.0638, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 137, | |
| "tokens_per_second_per_gpu": 438.9 | |
| }, | |
| { | |
| "epoch": 0.3165137614678899, | |
| "grad_norm": 0.030314577743411064, | |
| "learning_rate": 8.192949158105713e-05, | |
| "loss": 0.0682, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 138, | |
| "tokens_per_second_per_gpu": 355.22 | |
| }, | |
| { | |
| "epoch": 0.31880733944954126, | |
| "grad_norm": 0.02862844057381153, | |
| "learning_rate": 8.163730188160105e-05, | |
| "loss": 0.0764, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 139, | |
| "tokens_per_second_per_gpu": 430.33 | |
| }, | |
| { | |
| "epoch": 0.3211009174311927, | |
| "grad_norm": 0.030885115265846252, | |
| "learning_rate": 8.134329916915184e-05, | |
| "loss": 0.0774, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 140, | |
| "tokens_per_second_per_gpu": 369.87 | |
| }, | |
| { | |
| "epoch": 0.32339449541284404, | |
| "grad_norm": 0.025037452578544617, | |
| "learning_rate": 8.104750029188257e-05, | |
| "loss": 0.0695, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 141, | |
| "tokens_per_second_per_gpu": 538.21 | |
| }, | |
| { | |
| "epoch": 0.3256880733944954, | |
| "grad_norm": 0.02607853338122368, | |
| "learning_rate": 8.074992220089769e-05, | |
| "loss": 0.066, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 142, | |
| "tokens_per_second_per_gpu": 443.91 | |
| }, | |
| { | |
| "epoch": 0.32798165137614677, | |
| "grad_norm": 0.028251491487026215, | |
| "learning_rate": 8.045058194926153e-05, | |
| "loss": 0.0691, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 143, | |
| "tokens_per_second_per_gpu": 403.07 | |
| }, | |
| { | |
| "epoch": 0.3302752293577982, | |
| "grad_norm": 0.02848455123603344, | |
| "learning_rate": 8.014949669102117e-05, | |
| "loss": 0.0712, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 144, | |
| "tokens_per_second_per_gpu": 421.87 | |
| }, | |
| { | |
| "epoch": 0.33256880733944955, | |
| "grad_norm": 0.027499854564666748, | |
| "learning_rate": 7.984668368022335e-05, | |
| "loss": 0.071, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 145, | |
| "tokens_per_second_per_gpu": 310.07 | |
| }, | |
| { | |
| "epoch": 0.3348623853211009, | |
| "grad_norm": 0.05668507516384125, | |
| "learning_rate": 7.954216026992571e-05, | |
| "loss": 0.072, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 146, | |
| "tokens_per_second_per_gpu": 430.94 | |
| }, | |
| { | |
| "epoch": 0.33715596330275227, | |
| "grad_norm": 0.023797793313860893, | |
| "learning_rate": 7.923594391120236e-05, | |
| "loss": 0.0724, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 147, | |
| "tokens_per_second_per_gpu": 506.38 | |
| }, | |
| { | |
| "epoch": 0.3394495412844037, | |
| "grad_norm": 0.03140917047858238, | |
| "learning_rate": 7.892805215214381e-05, | |
| "loss": 0.0707, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 148, | |
| "tokens_per_second_per_gpu": 392.49 | |
| }, | |
| { | |
| "epoch": 0.34174311926605505, | |
| "grad_norm": 0.023651011288166046, | |
| "learning_rate": 7.861850263685134e-05, | |
| "loss": 0.0675, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 149, | |
| "tokens_per_second_per_gpu": 468.39 | |
| }, | |
| { | |
| "epoch": 0.3440366972477064, | |
| "grad_norm": 0.028501421213150024, | |
| "learning_rate": 7.830731310442599e-05, | |
| "loss": 0.0677, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 150, | |
| "tokens_per_second_per_gpu": 377.79 | |
| }, | |
| { | |
| "epoch": 0.3463302752293578, | |
| "grad_norm": 0.028334010392427444, | |
| "learning_rate": 7.799450138795185e-05, | |
| "loss": 0.0749, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 151, | |
| "tokens_per_second_per_gpu": 370.82 | |
| }, | |
| { | |
| "epoch": 0.3486238532110092, | |
| "grad_norm": 0.029713135212659836, | |
| "learning_rate": 7.768008541347423e-05, | |
| "loss": 0.066, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 152, | |
| "tokens_per_second_per_gpu": 403.75 | |
| }, | |
| { | |
| "epoch": 0.35091743119266056, | |
| "grad_norm": 0.030461538583040237, | |
| "learning_rate": 7.73640831989723e-05, | |
| "loss": 0.0667, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 153, | |
| "tokens_per_second_per_gpu": 473.97 | |
| }, | |
| { | |
| "epoch": 0.3532110091743119, | |
| "grad_norm": 0.02694588340818882, | |
| "learning_rate": 7.704651285332663e-05, | |
| "loss": 0.0642, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 154, | |
| "tokens_per_second_per_gpu": 421.0 | |
| }, | |
| { | |
| "epoch": 0.3555045871559633, | |
| "grad_norm": 0.025780972093343735, | |
| "learning_rate": 7.672739257528134e-05, | |
| "loss": 0.0727, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 155, | |
| "tokens_per_second_per_gpu": 507.84 | |
| }, | |
| { | |
| "epoch": 0.3577981651376147, | |
| "grad_norm": 0.027480922639369965, | |
| "learning_rate": 7.640674065240136e-05, | |
| "loss": 0.078, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 156, | |
| "tokens_per_second_per_gpu": 334.0 | |
| }, | |
| { | |
| "epoch": 0.36009174311926606, | |
| "grad_norm": 0.032992683351039886, | |
| "learning_rate": 7.608457546002424e-05, | |
| "loss": 0.0728, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 157, | |
| "tokens_per_second_per_gpu": 315.95 | |
| }, | |
| { | |
| "epoch": 0.3623853211009174, | |
| "grad_norm": 0.029259737581014633, | |
| "learning_rate": 7.576091546020725e-05, | |
| "loss": 0.0721, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 158, | |
| "tokens_per_second_per_gpu": 390.2 | |
| }, | |
| { | |
| "epoch": 0.3646788990825688, | |
| "grad_norm": 0.027205413207411766, | |
| "learning_rate": 7.543577920066944e-05, | |
| "loss": 0.0726, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 159, | |
| "tokens_per_second_per_gpu": 459.84 | |
| }, | |
| { | |
| "epoch": 0.3669724770642202, | |
| "grad_norm": 0.028103800490498543, | |
| "learning_rate": 7.510918531372857e-05, | |
| "loss": 0.0723, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 160, | |
| "tokens_per_second_per_gpu": 362.42 | |
| }, | |
| { | |
| "epoch": 0.36926605504587157, | |
| "grad_norm": 0.025422796607017517, | |
| "learning_rate": 7.478115251523352e-05, | |
| "loss": 0.0651, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 161, | |
| "tokens_per_second_per_gpu": 409.68 | |
| }, | |
| { | |
| "epoch": 0.37155963302752293, | |
| "grad_norm": 0.0247375275939703, | |
| "learning_rate": 7.445169960349167e-05, | |
| "loss": 0.0648, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 162, | |
| "tokens_per_second_per_gpu": 443.52 | |
| }, | |
| { | |
| "epoch": 0.3738532110091743, | |
| "grad_norm": 0.024430420249700546, | |
| "learning_rate": 7.412084545819168e-05, | |
| "loss": 0.0654, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 163, | |
| "tokens_per_second_per_gpu": 439.86 | |
| }, | |
| { | |
| "epoch": 0.3761467889908257, | |
| "grad_norm": 0.02779349498450756, | |
| "learning_rate": 7.378860903932159e-05, | |
| "loss": 0.07, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 164, | |
| "tokens_per_second_per_gpu": 387.88 | |
| }, | |
| { | |
| "epoch": 0.37844036697247707, | |
| "grad_norm": 0.028585737571120262, | |
| "learning_rate": 7.34550093860822e-05, | |
| "loss": 0.0794, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 165, | |
| "tokens_per_second_per_gpu": 469.25 | |
| }, | |
| { | |
| "epoch": 0.38073394495412843, | |
| "grad_norm": 0.028040310367941856, | |
| "learning_rate": 7.31200656157961e-05, | |
| "loss": 0.0702, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 166, | |
| "tokens_per_second_per_gpu": 340.82 | |
| }, | |
| { | |
| "epoch": 0.3830275229357798, | |
| "grad_norm": 0.030313577502965927, | |
| "learning_rate": 7.278379692281208e-05, | |
| "loss": 0.0694, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 167, | |
| "tokens_per_second_per_gpu": 414.21 | |
| }, | |
| { | |
| "epoch": 0.3853211009174312, | |
| "grad_norm": 0.032695479691028595, | |
| "learning_rate": 7.244622257740523e-05, | |
| "loss": 0.0658, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 168, | |
| "tokens_per_second_per_gpu": 435.84 | |
| }, | |
| { | |
| "epoch": 0.3876146788990826, | |
| "grad_norm": 0.02221628651022911, | |
| "learning_rate": 7.210736192467256e-05, | |
| "loss": 0.0596, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 169, | |
| "tokens_per_second_per_gpu": 451.04 | |
| }, | |
| { | |
| "epoch": 0.38990825688073394, | |
| "grad_norm": 0.02417284995317459, | |
| "learning_rate": 7.176723438342446e-05, | |
| "loss": 0.0714, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 170, | |
| "tokens_per_second_per_gpu": 444.02 | |
| }, | |
| { | |
| "epoch": 0.3922018348623853, | |
| "grad_norm": 0.027553344145417213, | |
| "learning_rate": 7.142585944507185e-05, | |
| "loss": 0.0613, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 171, | |
| "tokens_per_second_per_gpu": 436.33 | |
| }, | |
| { | |
| "epoch": 0.3944954128440367, | |
| "grad_norm": 0.028384285047650337, | |
| "learning_rate": 7.10832566725092e-05, | |
| "loss": 0.0634, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 172, | |
| "tokens_per_second_per_gpu": 389.66 | |
| }, | |
| { | |
| "epoch": 0.3967889908256881, | |
| "grad_norm": 0.024850716814398766, | |
| "learning_rate": 7.073944569899354e-05, | |
| "loss": 0.0717, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 173, | |
| "tokens_per_second_per_gpu": 475.5 | |
| }, | |
| { | |
| "epoch": 0.39908256880733944, | |
| "grad_norm": 0.025330083444714546, | |
| "learning_rate": 7.039444622701922e-05, | |
| "loss": 0.0724, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 174, | |
| "tokens_per_second_per_gpu": 383.81 | |
| }, | |
| { | |
| "epoch": 0.4013761467889908, | |
| "grad_norm": 0.025969544425606728, | |
| "learning_rate": 7.00482780271889e-05, | |
| "loss": 0.0712, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 175, | |
| "tokens_per_second_per_gpu": 385.6 | |
| }, | |
| { | |
| "epoch": 0.4036697247706422, | |
| "grad_norm": 0.02731173112988472, | |
| "learning_rate": 6.97009609370806e-05, | |
| "loss": 0.0678, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 176, | |
| "tokens_per_second_per_gpu": 430.61 | |
| }, | |
| { | |
| "epoch": 0.4059633027522936, | |
| "grad_norm": 0.028133299201726913, | |
| "learning_rate": 6.935251486011087e-05, | |
| "loss": 0.061, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 177, | |
| "tokens_per_second_per_gpu": 379.64 | |
| }, | |
| { | |
| "epoch": 0.40825688073394495, | |
| "grad_norm": 0.02273411862552166, | |
| "learning_rate": 6.900295976439413e-05, | |
| "loss": 0.0604, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 178, | |
| "tokens_per_second_per_gpu": 393.24 | |
| }, | |
| { | |
| "epoch": 0.4105504587155963, | |
| "grad_norm": 0.025121403858065605, | |
| "learning_rate": 6.865231568159846e-05, | |
| "loss": 0.0697, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 179, | |
| "tokens_per_second_per_gpu": 453.6 | |
| }, | |
| { | |
| "epoch": 0.41284403669724773, | |
| "grad_norm": 0.029893774539232254, | |
| "learning_rate": 6.830060270579768e-05, | |
| "loss": 0.0743, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.77, | |
| "memory/max_allocated (GiB)": 48.77, | |
| "step": 180, | |
| "tokens_per_second_per_gpu": 402.38 | |
| }, | |
| { | |
| "epoch": 0.4151376146788991, | |
| "grad_norm": 0.026196127757430077, | |
| "learning_rate": 6.794784099231972e-05, | |
| "loss": 0.0653, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 181, | |
| "tokens_per_second_per_gpu": 369.19 | |
| }, | |
| { | |
| "epoch": 0.41743119266055045, | |
| "grad_norm": 0.03042738139629364, | |
| "learning_rate": 6.759405075659166e-05, | |
| "loss": 0.0654, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 182, | |
| "tokens_per_second_per_gpu": 389.74 | |
| }, | |
| { | |
| "epoch": 0.4197247706422018, | |
| "grad_norm": 0.02454569563269615, | |
| "learning_rate": 6.723925227298132e-05, | |
| "loss": 0.0648, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 183, | |
| "tokens_per_second_per_gpu": 383.9 | |
| }, | |
| { | |
| "epoch": 0.42201834862385323, | |
| "grad_norm": 0.03029336780309677, | |
| "learning_rate": 6.688346587363533e-05, | |
| "loss": 0.0711, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 184, | |
| "tokens_per_second_per_gpu": 436.44 | |
| }, | |
| { | |
| "epoch": 0.4243119266055046, | |
| "grad_norm": 0.02716301940381527, | |
| "learning_rate": 6.652671194731396e-05, | |
| "loss": 0.0638, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 185, | |
| "tokens_per_second_per_gpu": 405.73 | |
| }, | |
| { | |
| "epoch": 0.42660550458715596, | |
| "grad_norm": 0.030476156622171402, | |
| "learning_rate": 6.616901093822283e-05, | |
| "loss": 0.0742, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 186, | |
| "tokens_per_second_per_gpu": 417.15 | |
| }, | |
| { | |
| "epoch": 0.4288990825688073, | |
| "grad_norm": 0.024246055632829666, | |
| "learning_rate": 6.58103833448412e-05, | |
| "loss": 0.0606, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 187, | |
| "tokens_per_second_per_gpu": 418.65 | |
| }, | |
| { | |
| "epoch": 0.43119266055045874, | |
| "grad_norm": 0.025659549981355667, | |
| "learning_rate": 6.545084971874738e-05, | |
| "loss": 0.0643, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 188, | |
| "tokens_per_second_per_gpu": 524.48 | |
| }, | |
| { | |
| "epoch": 0.4334862385321101, | |
| "grad_norm": 0.02851368486881256, | |
| "learning_rate": 6.509043066344092e-05, | |
| "loss": 0.0728, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 189, | |
| "tokens_per_second_per_gpu": 470.95 | |
| }, | |
| { | |
| "epoch": 0.43577981651376146, | |
| "grad_norm": 0.03035641275346279, | |
| "learning_rate": 6.472914683316195e-05, | |
| "loss": 0.0797, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 190, | |
| "tokens_per_second_per_gpu": 409.73 | |
| }, | |
| { | |
| "epoch": 0.4380733944954128, | |
| "grad_norm": 0.026916082948446274, | |
| "learning_rate": 6.436701893170756e-05, | |
| "loss": 0.06, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 191, | |
| "tokens_per_second_per_gpu": 424.58 | |
| }, | |
| { | |
| "epoch": 0.44036697247706424, | |
| "grad_norm": 0.035412922501564026, | |
| "learning_rate": 6.400406771124536e-05, | |
| "loss": 0.0699, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 192, | |
| "tokens_per_second_per_gpu": 372.44 | |
| }, | |
| { | |
| "epoch": 0.4426605504587156, | |
| "grad_norm": 0.02869465760886669, | |
| "learning_rate": 6.364031397112416e-05, | |
| "loss": 0.0709, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 193, | |
| "tokens_per_second_per_gpu": 411.07 | |
| }, | |
| { | |
| "epoch": 0.44495412844036697, | |
| "grad_norm": 0.02998914197087288, | |
| "learning_rate": 6.327577855668216e-05, | |
| "loss": 0.0693, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 194, | |
| "tokens_per_second_per_gpu": 473.81 | |
| }, | |
| { | |
| "epoch": 0.44724770642201833, | |
| "grad_norm": 0.029111091047525406, | |
| "learning_rate": 6.291048235805234e-05, | |
| "loss": 0.0789, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 195, | |
| "tokens_per_second_per_gpu": 393.48 | |
| }, | |
| { | |
| "epoch": 0.44954128440366975, | |
| "grad_norm": 0.028819169849157333, | |
| "learning_rate": 6.254444630896529e-05, | |
| "loss": 0.0738, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 196, | |
| "tokens_per_second_per_gpu": 339.21 | |
| }, | |
| { | |
| "epoch": 0.4518348623853211, | |
| "grad_norm": 0.027091829106211662, | |
| "learning_rate": 6.21776913855496e-05, | |
| "loss": 0.0606, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 197, | |
| "tokens_per_second_per_gpu": 490.05 | |
| }, | |
| { | |
| "epoch": 0.4541284403669725, | |
| "grad_norm": 0.023907724767923355, | |
| "learning_rate": 6.181023860512984e-05, | |
| "loss": 0.0664, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 198, | |
| "tokens_per_second_per_gpu": 437.98 | |
| }, | |
| { | |
| "epoch": 0.45642201834862384, | |
| "grad_norm": 0.026607749983668327, | |
| "learning_rate": 6.144210902502207e-05, | |
| "loss": 0.0686, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 199, | |
| "tokens_per_second_per_gpu": 518.9 | |
| }, | |
| { | |
| "epoch": 0.45871559633027525, | |
| "grad_norm": 0.028734847903251648, | |
| "learning_rate": 6.107332374132715e-05, | |
| "loss": 0.0709, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 200, | |
| "tokens_per_second_per_gpu": 448.6 | |
| }, | |
| { | |
| "epoch": 0.4610091743119266, | |
| "grad_norm": 0.027956590056419373, | |
| "learning_rate": 6.0703903887721837e-05, | |
| "loss": 0.0645, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 201, | |
| "tokens_per_second_per_gpu": 450.75 | |
| }, | |
| { | |
| "epoch": 0.463302752293578, | |
| "grad_norm": 0.02955472283065319, | |
| "learning_rate": 6.0333870634247645e-05, | |
| "loss": 0.0749, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 202, | |
| "tokens_per_second_per_gpu": 366.38 | |
| }, | |
| { | |
| "epoch": 0.46559633027522934, | |
| "grad_norm": 0.033545345067977905, | |
| "learning_rate": 5.9963245186097725e-05, | |
| "loss": 0.0714, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 203, | |
| "tokens_per_second_per_gpu": 409.9 | |
| }, | |
| { | |
| "epoch": 0.46788990825688076, | |
| "grad_norm": 0.027358222752809525, | |
| "learning_rate": 5.95920487824016e-05, | |
| "loss": 0.0632, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 204, | |
| "tokens_per_second_per_gpu": 409.18 | |
| }, | |
| { | |
| "epoch": 0.4701834862385321, | |
| "grad_norm": 0.026303566992282867, | |
| "learning_rate": 5.922030269500809e-05, | |
| "loss": 0.0621, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.77, | |
| "memory/max_allocated (GiB)": 48.77, | |
| "step": 205, | |
| "tokens_per_second_per_gpu": 344.97 | |
| }, | |
| { | |
| "epoch": 0.4724770642201835, | |
| "grad_norm": 0.023472387343645096, | |
| "learning_rate": 5.8848028227266325e-05, | |
| "loss": 0.0642, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 206, | |
| "tokens_per_second_per_gpu": 458.3 | |
| }, | |
| { | |
| "epoch": 0.47477064220183485, | |
| "grad_norm": 0.02930634468793869, | |
| "learning_rate": 5.847524671280484e-05, | |
| "loss": 0.07, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 207, | |
| "tokens_per_second_per_gpu": 386.88 | |
| }, | |
| { | |
| "epoch": 0.47706422018348627, | |
| "grad_norm": 0.02035793662071228, | |
| "learning_rate": 5.810197951430911e-05, | |
| "loss": 0.0558, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 208, | |
| "tokens_per_second_per_gpu": 479.37 | |
| }, | |
| { | |
| "epoch": 0.4793577981651376, | |
| "grad_norm": 0.027948010712862015, | |
| "learning_rate": 5.772824802229733e-05, | |
| "loss": 0.07, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.77, | |
| "memory/max_allocated (GiB)": 48.77, | |
| "step": 209, | |
| "tokens_per_second_per_gpu": 352.97 | |
| }, | |
| { | |
| "epoch": 0.481651376146789, | |
| "grad_norm": 0.027743425220251083, | |
| "learning_rate": 5.735407365389453e-05, | |
| "loss": 0.0686, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 210, | |
| "tokens_per_second_per_gpu": 419.65 | |
| }, | |
| { | |
| "epoch": 0.48394495412844035, | |
| "grad_norm": 0.03574339672923088, | |
| "learning_rate": 5.697947785160532e-05, | |
| "loss": 0.0593, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 211, | |
| "tokens_per_second_per_gpu": 391.99 | |
| }, | |
| { | |
| "epoch": 0.48623853211009177, | |
| "grad_norm": 0.03303733468055725, | |
| "learning_rate": 5.660448208208513e-05, | |
| "loss": 0.0615, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 212, | |
| "tokens_per_second_per_gpu": 420.47 | |
| }, | |
| { | |
| "epoch": 0.48853211009174313, | |
| "grad_norm": 0.030316850170493126, | |
| "learning_rate": 5.622910783490988e-05, | |
| "loss": 0.0745, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.77, | |
| "memory/max_allocated (GiB)": 48.77, | |
| "step": 213, | |
| "tokens_per_second_per_gpu": 379.16 | |
| }, | |
| { | |
| "epoch": 0.4908256880733945, | |
| "grad_norm": 0.031506236642599106, | |
| "learning_rate": 5.585337662134471e-05, | |
| "loss": 0.0724, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 214, | |
| "tokens_per_second_per_gpu": 376.6 | |
| }, | |
| { | |
| "epoch": 0.49311926605504586, | |
| "grad_norm": 0.025807412341237068, | |
| "learning_rate": 5.5477309973111046e-05, | |
| "loss": 0.0628, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 215, | |
| "tokens_per_second_per_gpu": 386.77 | |
| }, | |
| { | |
| "epoch": 0.4954128440366973, | |
| "grad_norm": 0.02294624038040638, | |
| "learning_rate": 5.510092944115286e-05, | |
| "loss": 0.0629, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 216, | |
| "tokens_per_second_per_gpu": 473.64 | |
| }, | |
| { | |
| "epoch": 0.49770642201834864, | |
| "grad_norm": 0.027048619464039803, | |
| "learning_rate": 5.472425659440157e-05, | |
| "loss": 0.0675, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 217, | |
| "tokens_per_second_per_gpu": 374.21 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.026564767584204674, | |
| "learning_rate": 5.4347313018540056e-05, | |
| "loss": 0.0697, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 218, | |
| "tokens_per_second_per_gpu": 442.12 | |
| }, | |
| { | |
| "epoch": 0.5022935779816514, | |
| "grad_norm": 0.03516434505581856, | |
| "learning_rate": 5.397012031476562e-05, | |
| "loss": 0.082, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 219, | |
| "tokens_per_second_per_gpu": 380.84 | |
| }, | |
| { | |
| "epoch": 0.5045871559633027, | |
| "grad_norm": 0.021558105945587158, | |
| "learning_rate": 5.359270009855216e-05, | |
| "loss": 0.0585, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 220, | |
| "tokens_per_second_per_gpu": 509.31 | |
| }, | |
| { | |
| "epoch": 0.5068807339449541, | |
| "grad_norm": 0.024724913761019707, | |
| "learning_rate": 5.321507399841148e-05, | |
| "loss": 0.0632, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 221, | |
| "tokens_per_second_per_gpu": 438.7 | |
| }, | |
| { | |
| "epoch": 0.5091743119266054, | |
| "grad_norm": 0.02698579616844654, | |
| "learning_rate": 5.2837263654653715e-05, | |
| "loss": 0.0715, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 222, | |
| "tokens_per_second_per_gpu": 337.92 | |
| }, | |
| { | |
| "epoch": 0.5114678899082569, | |
| "grad_norm": 0.03043169341981411, | |
| "learning_rate": 5.2459290718147344e-05, | |
| "loss": 0.0755, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 223, | |
| "tokens_per_second_per_gpu": 485.96 | |
| }, | |
| { | |
| "epoch": 0.5137614678899083, | |
| "grad_norm": 0.026405537500977516, | |
| "learning_rate": 5.2081176849078464e-05, | |
| "loss": 0.0641, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 224, | |
| "tokens_per_second_per_gpu": 434.97 | |
| }, | |
| { | |
| "epoch": 0.5160550458715596, | |
| "grad_norm": 0.024269182235002518, | |
| "learning_rate": 5.170294371570939e-05, | |
| "loss": 0.0666, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 225, | |
| "tokens_per_second_per_gpu": 399.27 | |
| }, | |
| { | |
| "epoch": 0.518348623853211, | |
| "grad_norm": 0.03496242314577103, | |
| "learning_rate": 5.132461299313709e-05, | |
| "loss": 0.073, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 226, | |
| "tokens_per_second_per_gpu": 422.84 | |
| }, | |
| { | |
| "epoch": 0.5206422018348624, | |
| "grad_norm": 0.029179584234952927, | |
| "learning_rate": 5.094620636205095e-05, | |
| "loss": 0.0697, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 227, | |
| "tokens_per_second_per_gpu": 357.38 | |
| }, | |
| { | |
| "epoch": 0.5229357798165137, | |
| "grad_norm": 0.027006233111023903, | |
| "learning_rate": 5.056774550749043e-05, | |
| "loss": 0.0614, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 228, | |
| "tokens_per_second_per_gpu": 316.93 | |
| }, | |
| { | |
| "epoch": 0.5252293577981652, | |
| "grad_norm": 0.028260482475161552, | |
| "learning_rate": 5.018925211760227e-05, | |
| "loss": 0.0634, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 229, | |
| "tokens_per_second_per_gpu": 417.85 | |
| }, | |
| { | |
| "epoch": 0.5275229357798165, | |
| "grad_norm": 0.025130394846200943, | |
| "learning_rate": 4.981074788239773e-05, | |
| "loss": 0.0588, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 230, | |
| "tokens_per_second_per_gpu": 413.46 | |
| }, | |
| { | |
| "epoch": 0.5298165137614679, | |
| "grad_norm": 0.025551561266183853, | |
| "learning_rate": 4.943225449250958e-05, | |
| "loss": 0.0688, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 231, | |
| "tokens_per_second_per_gpu": 445.27 | |
| }, | |
| { | |
| "epoch": 0.5321100917431193, | |
| "grad_norm": 0.028664810582995415, | |
| "learning_rate": 4.9053793637949067e-05, | |
| "loss": 0.0689, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 232, | |
| "tokens_per_second_per_gpu": 395.88 | |
| }, | |
| { | |
| "epoch": 0.5344036697247706, | |
| "grad_norm": 0.02686873823404312, | |
| "learning_rate": 4.8675387006862914e-05, | |
| "loss": 0.0656, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 233, | |
| "tokens_per_second_per_gpu": 544.1 | |
| }, | |
| { | |
| "epoch": 0.536697247706422, | |
| "grad_norm": 0.03144492581486702, | |
| "learning_rate": 4.829705628429061e-05, | |
| "loss": 0.0795, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 234, | |
| "tokens_per_second_per_gpu": 356.41 | |
| }, | |
| { | |
| "epoch": 0.5389908256880734, | |
| "grad_norm": 0.02188139036297798, | |
| "learning_rate": 4.7918823150921555e-05, | |
| "loss": 0.0611, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 235, | |
| "tokens_per_second_per_gpu": 368.54 | |
| }, | |
| { | |
| "epoch": 0.5412844036697247, | |
| "grad_norm": 0.02784140035510063, | |
| "learning_rate": 4.754070928185266e-05, | |
| "loss": 0.0604, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 236, | |
| "tokens_per_second_per_gpu": 445.84 | |
| }, | |
| { | |
| "epoch": 0.5435779816513762, | |
| "grad_norm": 0.02372545376420021, | |
| "learning_rate": 4.7162736345346303e-05, | |
| "loss": 0.0604, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 237, | |
| "tokens_per_second_per_gpu": 467.85 | |
| }, | |
| { | |
| "epoch": 0.5458715596330275, | |
| "grad_norm": 0.03274843469262123, | |
| "learning_rate": 4.6784926001588544e-05, | |
| "loss": 0.0817, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 238, | |
| "tokens_per_second_per_gpu": 438.54 | |
| }, | |
| { | |
| "epoch": 0.5481651376146789, | |
| "grad_norm": 0.02551015093922615, | |
| "learning_rate": 4.640729990144784e-05, | |
| "loss": 0.0631, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 239, | |
| "tokens_per_second_per_gpu": 486.44 | |
| }, | |
| { | |
| "epoch": 0.5504587155963303, | |
| "grad_norm": 0.04315930977463722, | |
| "learning_rate": 4.6029879685234395e-05, | |
| "loss": 0.0661, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 240, | |
| "tokens_per_second_per_gpu": 450.6 | |
| }, | |
| { | |
| "epoch": 0.5527522935779816, | |
| "grad_norm": 0.024066558107733727, | |
| "learning_rate": 4.565268698145997e-05, | |
| "loss": 0.0612, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 241, | |
| "tokens_per_second_per_gpu": 462.93 | |
| }, | |
| { | |
| "epoch": 0.555045871559633, | |
| "grad_norm": 0.026846949011087418, | |
| "learning_rate": 4.527574340559844e-05, | |
| "loss": 0.0754, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 242, | |
| "tokens_per_second_per_gpu": 392.01 | |
| }, | |
| { | |
| "epoch": 0.5573394495412844, | |
| "grad_norm": 0.02346811629831791, | |
| "learning_rate": 4.4899070558847154e-05, | |
| "loss": 0.0675, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 243, | |
| "tokens_per_second_per_gpu": 468.19 | |
| }, | |
| { | |
| "epoch": 0.5596330275229358, | |
| "grad_norm": 0.02288683131337166, | |
| "learning_rate": 4.452269002688897e-05, | |
| "loss": 0.064, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 244, | |
| "tokens_per_second_per_gpu": 306.21 | |
| }, | |
| { | |
| "epoch": 0.5619266055045872, | |
| "grad_norm": 0.0288680586963892, | |
| "learning_rate": 4.4146623378655296e-05, | |
| "loss": 0.0677, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.73, | |
| "memory/max_allocated (GiB)": 48.73, | |
| "step": 245, | |
| "tokens_per_second_per_gpu": 325.4 | |
| }, | |
| { | |
| "epoch": 0.5642201834862385, | |
| "grad_norm": 0.02450747601687908, | |
| "learning_rate": 4.3770892165090126e-05, | |
| "loss": 0.0638, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 246, | |
| "tokens_per_second_per_gpu": 401.5 | |
| }, | |
| { | |
| "epoch": 0.5665137614678899, | |
| "grad_norm": 0.028074199333786964, | |
| "learning_rate": 4.3395517917914895e-05, | |
| "loss": 0.0615, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 247, | |
| "tokens_per_second_per_gpu": 537.03 | |
| }, | |
| { | |
| "epoch": 0.5688073394495413, | |
| "grad_norm": 0.02514073997735977, | |
| "learning_rate": 4.3020522148394676e-05, | |
| "loss": 0.0669, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 248, | |
| "tokens_per_second_per_gpu": 409.93 | |
| }, | |
| { | |
| "epoch": 0.5711009174311926, | |
| "grad_norm": 0.029449012130498886, | |
| "learning_rate": 4.2645926346105484e-05, | |
| "loss": 0.0711, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 249, | |
| "tokens_per_second_per_gpu": 344.6 | |
| }, | |
| { | |
| "epoch": 0.573394495412844, | |
| "grad_norm": 0.024152036756277084, | |
| "learning_rate": 4.22717519777027e-05, | |
| "loss": 0.0652, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 250, | |
| "tokens_per_second_per_gpu": 417.48 | |
| }, | |
| { | |
| "epoch": 0.5756880733944955, | |
| "grad_norm": 0.02781221643090248, | |
| "learning_rate": 4.189802048569089e-05, | |
| "loss": 0.0598, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 251, | |
| "tokens_per_second_per_gpu": 477.01 | |
| }, | |
| { | |
| "epoch": 0.5779816513761468, | |
| "grad_norm": 0.02137266844511032, | |
| "learning_rate": 4.1524753287195165e-05, | |
| "loss": 0.0584, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 252, | |
| "tokens_per_second_per_gpu": 475.28 | |
| }, | |
| { | |
| "epoch": 0.5802752293577982, | |
| "grad_norm": 0.03145367652177811, | |
| "learning_rate": 4.1151971772733686e-05, | |
| "loss": 0.0742, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 253, | |
| "tokens_per_second_per_gpu": 416.81 | |
| }, | |
| { | |
| "epoch": 0.5825688073394495, | |
| "grad_norm": 0.026259735226631165, | |
| "learning_rate": 4.07796973049919e-05, | |
| "loss": 0.0704, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 254, | |
| "tokens_per_second_per_gpu": 432.14 | |
| }, | |
| { | |
| "epoch": 0.5848623853211009, | |
| "grad_norm": 0.029704980552196503, | |
| "learning_rate": 4.04079512175984e-05, | |
| "loss": 0.0751, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 255, | |
| "tokens_per_second_per_gpu": 368.81 | |
| }, | |
| { | |
| "epoch": 0.5871559633027523, | |
| "grad_norm": 0.037060242146253586, | |
| "learning_rate": 4.003675481390228e-05, | |
| "loss": 0.081, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 256, | |
| "tokens_per_second_per_gpu": 400.19 | |
| }, | |
| { | |
| "epoch": 0.5894495412844036, | |
| "grad_norm": 0.027513017877936363, | |
| "learning_rate": 3.966612936575235e-05, | |
| "loss": 0.0597, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 257, | |
| "tokens_per_second_per_gpu": 381.21 | |
| }, | |
| { | |
| "epoch": 0.591743119266055, | |
| "grad_norm": 0.037167515605688095, | |
| "learning_rate": 3.929609611227817e-05, | |
| "loss": 0.0639, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 258, | |
| "tokens_per_second_per_gpu": 357.36 | |
| }, | |
| { | |
| "epoch": 0.5940366972477065, | |
| "grad_norm": 0.0229306872934103, | |
| "learning_rate": 3.8926676258672866e-05, | |
| "loss": 0.0626, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 259, | |
| "tokens_per_second_per_gpu": 387.68 | |
| }, | |
| { | |
| "epoch": 0.5963302752293578, | |
| "grad_norm": 0.027137834578752518, | |
| "learning_rate": 3.855789097497794e-05, | |
| "loss": 0.0711, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 260, | |
| "tokens_per_second_per_gpu": 377.29 | |
| }, | |
| { | |
| "epoch": 0.5986238532110092, | |
| "grad_norm": 0.027339540421962738, | |
| "learning_rate": 3.818976139487017e-05, | |
| "loss": 0.0644, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 261, | |
| "tokens_per_second_per_gpu": 476.61 | |
| }, | |
| { | |
| "epoch": 0.6009174311926605, | |
| "grad_norm": 0.02739766612648964, | |
| "learning_rate": 3.7822308614450406e-05, | |
| "loss": 0.0711, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 262, | |
| "tokens_per_second_per_gpu": 426.96 | |
| }, | |
| { | |
| "epoch": 0.6032110091743119, | |
| "grad_norm": 0.02805398218333721, | |
| "learning_rate": 3.745555369103471e-05, | |
| "loss": 0.0669, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 263, | |
| "tokens_per_second_per_gpu": 363.5 | |
| }, | |
| { | |
| "epoch": 0.6055045871559633, | |
| "grad_norm": 0.03466130048036575, | |
| "learning_rate": 3.708951764194767e-05, | |
| "loss": 0.0771, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 264, | |
| "tokens_per_second_per_gpu": 383.75 | |
| }, | |
| { | |
| "epoch": 0.6077981651376146, | |
| "grad_norm": 0.02684733085334301, | |
| "learning_rate": 3.6724221443317855e-05, | |
| "loss": 0.0613, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 265, | |
| "tokens_per_second_per_gpu": 545.2 | |
| }, | |
| { | |
| "epoch": 0.6100917431192661, | |
| "grad_norm": 0.025042880326509476, | |
| "learning_rate": 3.635968602887585e-05, | |
| "loss": 0.0706, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 266, | |
| "tokens_per_second_per_gpu": 420.58 | |
| }, | |
| { | |
| "epoch": 0.6123853211009175, | |
| "grad_norm": 0.02610246278345585, | |
| "learning_rate": 3.599593228875465e-05, | |
| "loss": 0.0749, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 267, | |
| "tokens_per_second_per_gpu": 443.84 | |
| }, | |
| { | |
| "epoch": 0.6146788990825688, | |
| "grad_norm": 0.02343624271452427, | |
| "learning_rate": 3.563298106829244e-05, | |
| "loss": 0.0676, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 268, | |
| "tokens_per_second_per_gpu": 409.89 | |
| }, | |
| { | |
| "epoch": 0.6169724770642202, | |
| "grad_norm": 0.02438695915043354, | |
| "learning_rate": 3.527085316683805e-05, | |
| "loss": 0.0648, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 269, | |
| "tokens_per_second_per_gpu": 475.38 | |
| }, | |
| { | |
| "epoch": 0.6192660550458715, | |
| "grad_norm": 0.02070113644003868, | |
| "learning_rate": 3.490956933655909e-05, | |
| "loss": 0.0605, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 270, | |
| "tokens_per_second_per_gpu": 341.3 | |
| }, | |
| { | |
| "epoch": 0.6215596330275229, | |
| "grad_norm": 0.03797437623143196, | |
| "learning_rate": 3.4549150281252636e-05, | |
| "loss": 0.0674, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 271, | |
| "tokens_per_second_per_gpu": 391.97 | |
| }, | |
| { | |
| "epoch": 0.6238532110091743, | |
| "grad_norm": 0.02536945417523384, | |
| "learning_rate": 3.41896166551588e-05, | |
| "loss": 0.0649, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 272, | |
| "tokens_per_second_per_gpu": 461.4 | |
| }, | |
| { | |
| "epoch": 0.6261467889908257, | |
| "grad_norm": 0.032918062061071396, | |
| "learning_rate": 3.383098906177719e-05, | |
| "loss": 0.0769, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 273, | |
| "tokens_per_second_per_gpu": 495.12 | |
| }, | |
| { | |
| "epoch": 0.6284403669724771, | |
| "grad_norm": 0.03230955824255943, | |
| "learning_rate": 3.347328805268605e-05, | |
| "loss": 0.0687, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 274, | |
| "tokens_per_second_per_gpu": 355.59 | |
| }, | |
| { | |
| "epoch": 0.6307339449541285, | |
| "grad_norm": 0.045344047248363495, | |
| "learning_rate": 3.3116534126364685e-05, | |
| "loss": 0.0748, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 275, | |
| "tokens_per_second_per_gpu": 339.05 | |
| }, | |
| { | |
| "epoch": 0.6330275229357798, | |
| "grad_norm": 0.021811284124851227, | |
| "learning_rate": 3.2760747727018694e-05, | |
| "loss": 0.0646, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 276, | |
| "tokens_per_second_per_gpu": 334.53 | |
| }, | |
| { | |
| "epoch": 0.6353211009174312, | |
| "grad_norm": 0.02648971416056156, | |
| "learning_rate": 3.240594924340835e-05, | |
| "loss": 0.068, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 277, | |
| "tokens_per_second_per_gpu": 375.76 | |
| }, | |
| { | |
| "epoch": 0.6376146788990825, | |
| "grad_norm": 0.022893795743584633, | |
| "learning_rate": 3.205215900768029e-05, | |
| "loss": 0.0627, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 278, | |
| "tokens_per_second_per_gpu": 412.8 | |
| }, | |
| { | |
| "epoch": 0.6399082568807339, | |
| "grad_norm": 0.027191977947950363, | |
| "learning_rate": 3.169939729420233e-05, | |
| "loss": 0.0632, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 279, | |
| "tokens_per_second_per_gpu": 408.91 | |
| }, | |
| { | |
| "epoch": 0.6422018348623854, | |
| "grad_norm": 0.023182721808552742, | |
| "learning_rate": 3.1347684318401536e-05, | |
| "loss": 0.0631, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 280, | |
| "tokens_per_second_per_gpu": 434.99 | |
| }, | |
| { | |
| "epoch": 0.6444954128440367, | |
| "grad_norm": 0.03368153050541878, | |
| "learning_rate": 3.099704023560587e-05, | |
| "loss": 0.0762, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 281, | |
| "tokens_per_second_per_gpu": 393.93 | |
| }, | |
| { | |
| "epoch": 0.6467889908256881, | |
| "grad_norm": 0.023287048563361168, | |
| "learning_rate": 3.0647485139889145e-05, | |
| "loss": 0.0629, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 282, | |
| "tokens_per_second_per_gpu": 335.84 | |
| }, | |
| { | |
| "epoch": 0.6490825688073395, | |
| "grad_norm": 0.027626749128103256, | |
| "learning_rate": 3.0299039062919416e-05, | |
| "loss": 0.0631, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 283, | |
| "tokens_per_second_per_gpu": 446.72 | |
| }, | |
| { | |
| "epoch": 0.6513761467889908, | |
| "grad_norm": 0.02671007066965103, | |
| "learning_rate": 2.995172197281113e-05, | |
| "loss": 0.0684, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 284, | |
| "tokens_per_second_per_gpu": 419.33 | |
| }, | |
| { | |
| "epoch": 0.6536697247706422, | |
| "grad_norm": 0.026775743812322617, | |
| "learning_rate": 2.96055537729808e-05, | |
| "loss": 0.063, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 285, | |
| "tokens_per_second_per_gpu": 456.33 | |
| }, | |
| { | |
| "epoch": 0.6559633027522935, | |
| "grad_norm": 0.024690093472599983, | |
| "learning_rate": 2.926055430100647e-05, | |
| "loss": 0.0601, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 286, | |
| "tokens_per_second_per_gpu": 363.46 | |
| }, | |
| { | |
| "epoch": 0.658256880733945, | |
| "grad_norm": 0.021927161142230034, | |
| "learning_rate": 2.8916743327490803e-05, | |
| "loss": 0.0598, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 287, | |
| "tokens_per_second_per_gpu": 395.18 | |
| }, | |
| { | |
| "epoch": 0.6605504587155964, | |
| "grad_norm": 0.029110578820109367, | |
| "learning_rate": 2.8574140554928175e-05, | |
| "loss": 0.0732, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 288, | |
| "tokens_per_second_per_gpu": 395.82 | |
| }, | |
| { | |
| "epoch": 0.6628440366972477, | |
| "grad_norm": 0.025474051013588905, | |
| "learning_rate": 2.8232765616575563e-05, | |
| "loss": 0.0674, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 289, | |
| "tokens_per_second_per_gpu": 435.52 | |
| }, | |
| { | |
| "epoch": 0.6651376146788991, | |
| "grad_norm": 0.02178235538303852, | |
| "learning_rate": 2.789263807532746e-05, | |
| "loss": 0.0616, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 290, | |
| "tokens_per_second_per_gpu": 442.39 | |
| }, | |
| { | |
| "epoch": 0.6674311926605505, | |
| "grad_norm": 0.023412682116031647, | |
| "learning_rate": 2.7553777422594774e-05, | |
| "loss": 0.0673, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 291, | |
| "tokens_per_second_per_gpu": 412.59 | |
| }, | |
| { | |
| "epoch": 0.6697247706422018, | |
| "grad_norm": 0.023469222709536552, | |
| "learning_rate": 2.721620307718793e-05, | |
| "loss": 0.0682, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.77, | |
| "memory/max_allocated (GiB)": 48.77, | |
| "step": 292, | |
| "tokens_per_second_per_gpu": 276.54 | |
| }, | |
| { | |
| "epoch": 0.6720183486238532, | |
| "grad_norm": 0.03131282329559326, | |
| "learning_rate": 2.687993438420392e-05, | |
| "loss": 0.0647, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 293, | |
| "tokens_per_second_per_gpu": 392.4 | |
| }, | |
| { | |
| "epoch": 0.6743119266055045, | |
| "grad_norm": 0.02991569973528385, | |
| "learning_rate": 2.65449906139178e-05, | |
| "loss": 0.0681, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 294, | |
| "tokens_per_second_per_gpu": 377.34 | |
| }, | |
| { | |
| "epoch": 0.676605504587156, | |
| "grad_norm": 0.02651585452258587, | |
| "learning_rate": 2.6211390960678413e-05, | |
| "loss": 0.0802, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.77, | |
| "memory/max_allocated (GiB)": 48.77, | |
| "step": 295, | |
| "tokens_per_second_per_gpu": 358.61 | |
| }, | |
| { | |
| "epoch": 0.6788990825688074, | |
| "grad_norm": 0.022964881733059883, | |
| "learning_rate": 2.5879154541808337e-05, | |
| "loss": 0.0643, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 296, | |
| "tokens_per_second_per_gpu": 484.52 | |
| }, | |
| { | |
| "epoch": 0.6811926605504587, | |
| "grad_norm": 0.028967639431357384, | |
| "learning_rate": 2.554830039650834e-05, | |
| "loss": 0.0632, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 297, | |
| "tokens_per_second_per_gpu": 440.4 | |
| }, | |
| { | |
| "epoch": 0.6834862385321101, | |
| "grad_norm": 0.02948296256363392, | |
| "learning_rate": 2.5218847484766495e-05, | |
| "loss": 0.0752, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 298, | |
| "tokens_per_second_per_gpu": 288.6 | |
| }, | |
| { | |
| "epoch": 0.6857798165137615, | |
| "grad_norm": 0.03220253810286522, | |
| "learning_rate": 2.4890814686271448e-05, | |
| "loss": 0.0634, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 299, | |
| "tokens_per_second_per_gpu": 447.81 | |
| }, | |
| { | |
| "epoch": 0.6880733944954128, | |
| "grad_norm": 0.028979238122701645, | |
| "learning_rate": 2.456422079933056e-05, | |
| "loss": 0.0689, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 300, | |
| "tokens_per_second_per_gpu": 458.5 | |
| }, | |
| { | |
| "epoch": 0.6903669724770642, | |
| "grad_norm": 0.024549167603254318, | |
| "learning_rate": 2.4239084539792745e-05, | |
| "loss": 0.0593, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 301, | |
| "tokens_per_second_per_gpu": 419.65 | |
| }, | |
| { | |
| "epoch": 0.6926605504587156, | |
| "grad_norm": 0.02671237848699093, | |
| "learning_rate": 2.391542453997578e-05, | |
| "loss": 0.0657, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 302, | |
| "tokens_per_second_per_gpu": 368.27 | |
| }, | |
| { | |
| "epoch": 0.694954128440367, | |
| "grad_norm": 0.03672722727060318, | |
| "learning_rate": 2.3593259347598657e-05, | |
| "loss": 0.0535, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 303, | |
| "tokens_per_second_per_gpu": 474.85 | |
| }, | |
| { | |
| "epoch": 0.6972477064220184, | |
| "grad_norm": 0.03666655346751213, | |
| "learning_rate": 2.3272607424718675e-05, | |
| "loss": 0.0646, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 304, | |
| "tokens_per_second_per_gpu": 393.88 | |
| }, | |
| { | |
| "epoch": 0.6995412844036697, | |
| "grad_norm": 0.025117024779319763, | |
| "learning_rate": 2.29534871466734e-05, | |
| "loss": 0.0699, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 305, | |
| "tokens_per_second_per_gpu": 449.5 | |
| }, | |
| { | |
| "epoch": 0.7018348623853211, | |
| "grad_norm": 0.035403817892074585, | |
| "learning_rate": 2.2635916801027706e-05, | |
| "loss": 0.0769, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 306, | |
| "tokens_per_second_per_gpu": 420.33 | |
| }, | |
| { | |
| "epoch": 0.7041284403669725, | |
| "grad_norm": 0.026707297191023827, | |
| "learning_rate": 2.2319914586525777e-05, | |
| "loss": 0.0633, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 307, | |
| "tokens_per_second_per_gpu": 451.77 | |
| }, | |
| { | |
| "epoch": 0.7064220183486238, | |
| "grad_norm": 0.02504413016140461, | |
| "learning_rate": 2.2005498612048155e-05, | |
| "loss": 0.0597, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 308, | |
| "tokens_per_second_per_gpu": 357.06 | |
| }, | |
| { | |
| "epoch": 0.7087155963302753, | |
| "grad_norm": 0.02307130955159664, | |
| "learning_rate": 2.1692686895574005e-05, | |
| "loss": 0.064, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 309, | |
| "tokens_per_second_per_gpu": 474.84 | |
| }, | |
| { | |
| "epoch": 0.7110091743119266, | |
| "grad_norm": 0.026173440739512444, | |
| "learning_rate": 2.1381497363148673e-05, | |
| "loss": 0.063, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 310, | |
| "tokens_per_second_per_gpu": 403.88 | |
| }, | |
| { | |
| "epoch": 0.713302752293578, | |
| "grad_norm": 0.027350088581442833, | |
| "learning_rate": 2.1071947847856222e-05, | |
| "loss": 0.0674, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 311, | |
| "tokens_per_second_per_gpu": 409.62 | |
| }, | |
| { | |
| "epoch": 0.7155963302752294, | |
| "grad_norm": 0.02530243620276451, | |
| "learning_rate": 2.0764056088797645e-05, | |
| "loss": 0.063, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 312, | |
| "tokens_per_second_per_gpu": 385.83 | |
| }, | |
| { | |
| "epoch": 0.7178899082568807, | |
| "grad_norm": 0.028018414974212646, | |
| "learning_rate": 2.045783973007429e-05, | |
| "loss": 0.0634, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 313, | |
| "tokens_per_second_per_gpu": 395.65 | |
| }, | |
| { | |
| "epoch": 0.7201834862385321, | |
| "grad_norm": 0.02613895572721958, | |
| "learning_rate": 2.0153316319776662e-05, | |
| "loss": 0.0653, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 314, | |
| "tokens_per_second_per_gpu": 357.1 | |
| }, | |
| { | |
| "epoch": 0.7224770642201835, | |
| "grad_norm": 0.026048416271805763, | |
| "learning_rate": 1.985050330897883e-05, | |
| "loss": 0.0644, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 315, | |
| "tokens_per_second_per_gpu": 395.0 | |
| }, | |
| { | |
| "epoch": 0.7247706422018348, | |
| "grad_norm": 0.030031291767954826, | |
| "learning_rate": 1.954941805073848e-05, | |
| "loss": 0.078, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 316, | |
| "tokens_per_second_per_gpu": 372.79 | |
| }, | |
| { | |
| "epoch": 0.7270642201834863, | |
| "grad_norm": 0.029979195445775986, | |
| "learning_rate": 1.9250077799102322e-05, | |
| "loss": 0.0651, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 317, | |
| "tokens_per_second_per_gpu": 438.54 | |
| }, | |
| { | |
| "epoch": 0.7293577981651376, | |
| "grad_norm": 0.025628041476011276, | |
| "learning_rate": 1.8952499708117432e-05, | |
| "loss": 0.0669, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 318, | |
| "tokens_per_second_per_gpu": 474.63 | |
| }, | |
| { | |
| "epoch": 0.731651376146789, | |
| "grad_norm": 0.024868648499250412, | |
| "learning_rate": 1.8656700830848174e-05, | |
| "loss": 0.0656, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 319, | |
| "tokens_per_second_per_gpu": 445.15 | |
| }, | |
| { | |
| "epoch": 0.7339449541284404, | |
| "grad_norm": 0.024810567498207092, | |
| "learning_rate": 1.8362698118398967e-05, | |
| "loss": 0.064, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 320, | |
| "tokens_per_second_per_gpu": 383.63 | |
| }, | |
| { | |
| "epoch": 0.7362385321100917, | |
| "grad_norm": 0.02743346616625786, | |
| "learning_rate": 1.8070508418942876e-05, | |
| "loss": 0.0758, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 321, | |
| "tokens_per_second_per_gpu": 386.17 | |
| }, | |
| { | |
| "epoch": 0.7385321100917431, | |
| "grad_norm": 0.028884073719382286, | |
| "learning_rate": 1.7780148476756147e-05, | |
| "loss": 0.0675, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 322, | |
| "tokens_per_second_per_gpu": 498.58 | |
| }, | |
| { | |
| "epoch": 0.7408256880733946, | |
| "grad_norm": 0.028301537036895752, | |
| "learning_rate": 1.7491634931258587e-05, | |
| "loss": 0.0734, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 323, | |
| "tokens_per_second_per_gpu": 392.47 | |
| }, | |
| { | |
| "epoch": 0.7431192660550459, | |
| "grad_norm": 0.02405114285647869, | |
| "learning_rate": 1.7204984316060063e-05, | |
| "loss": 0.0538, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 324, | |
| "tokens_per_second_per_gpu": 409.24 | |
| }, | |
| { | |
| "epoch": 0.7454128440366973, | |
| "grad_norm": 0.029399245977401733, | |
| "learning_rate": 1.6920213058013022e-05, | |
| "loss": 0.0693, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.08, | |
| "memory/max_allocated (GiB)": 49.08, | |
| "step": 325, | |
| "tokens_per_second_per_gpu": 461.77 | |
| }, | |
| { | |
| "epoch": 0.7477064220183486, | |
| "grad_norm": 0.02802177332341671, | |
| "learning_rate": 1.6637337476271124e-05, | |
| "loss": 0.0647, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 326, | |
| "tokens_per_second_per_gpu": 389.28 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 0.024391207844018936, | |
| "learning_rate": 1.6356373781354058e-05, | |
| "loss": 0.066, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 327, | |
| "tokens_per_second_per_gpu": 376.51 | |
| }, | |
| { | |
| "epoch": 0.7522935779816514, | |
| "grad_norm": 0.02589585818350315, | |
| "learning_rate": 1.6077338074218596e-05, | |
| "loss": 0.0676, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 328, | |
| "tokens_per_second_per_gpu": 422.1 | |
| }, | |
| { | |
| "epoch": 0.7545871559633027, | |
| "grad_norm": 0.022877002134919167, | |
| "learning_rate": 1.580024634533587e-05, | |
| "loss": 0.0653, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 329, | |
| "tokens_per_second_per_gpu": 440.68 | |
| }, | |
| { | |
| "epoch": 0.7568807339449541, | |
| "grad_norm": 0.029319310560822487, | |
| "learning_rate": 1.5525114473775014e-05, | |
| "loss": 0.0871, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 330, | |
| "tokens_per_second_per_gpu": 435.63 | |
| }, | |
| { | |
| "epoch": 0.7591743119266054, | |
| "grad_norm": 0.03219328075647354, | |
| "learning_rate": 1.5251958226293306e-05, | |
| "loss": 0.0801, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 331, | |
| "tokens_per_second_per_gpu": 363.82 | |
| }, | |
| { | |
| "epoch": 0.7614678899082569, | |
| "grad_norm": 0.024657782167196274, | |
| "learning_rate": 1.4980793256432474e-05, | |
| "loss": 0.0622, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 332, | |
| "tokens_per_second_per_gpu": 342.94 | |
| }, | |
| { | |
| "epoch": 0.7637614678899083, | |
| "grad_norm": 0.03142733871936798, | |
| "learning_rate": 1.4711635103621719e-05, | |
| "loss": 0.0681, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 333, | |
| "tokens_per_second_per_gpu": 404.61 | |
| }, | |
| { | |
| "epoch": 0.7660550458715596, | |
| "grad_norm": 0.026000676676630974, | |
| "learning_rate": 1.4444499192287275e-05, | |
| "loss": 0.065, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 334, | |
| "tokens_per_second_per_gpu": 367.91 | |
| }, | |
| { | |
| "epoch": 0.768348623853211, | |
| "grad_norm": 0.03227536380290985, | |
| "learning_rate": 1.4179400830968415e-05, | |
| "loss": 0.0767, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 335, | |
| "tokens_per_second_per_gpu": 314.19 | |
| }, | |
| { | |
| "epoch": 0.7706422018348624, | |
| "grad_norm": 0.025221284478902817, | |
| "learning_rate": 1.3916355211440164e-05, | |
| "loss": 0.0645, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 336, | |
| "tokens_per_second_per_gpu": 362.08 | |
| }, | |
| { | |
| "epoch": 0.7729357798165137, | |
| "grad_norm": 0.030213654041290283, | |
| "learning_rate": 1.3655377407842812e-05, | |
| "loss": 0.066, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 337, | |
| "tokens_per_second_per_gpu": 466.08 | |
| }, | |
| { | |
| "epoch": 0.7752293577981652, | |
| "grad_norm": 0.026164716109633446, | |
| "learning_rate": 1.3396482375817975e-05, | |
| "loss": 0.0656, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 338, | |
| "tokens_per_second_per_gpu": 458.34 | |
| }, | |
| { | |
| "epoch": 0.7775229357798165, | |
| "grad_norm": 0.0265730619430542, | |
| "learning_rate": 1.3139684951651588e-05, | |
| "loss": 0.0636, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 339, | |
| "tokens_per_second_per_gpu": 399.93 | |
| }, | |
| { | |
| "epoch": 0.7798165137614679, | |
| "grad_norm": 0.026285763829946518, | |
| "learning_rate": 1.2884999851423673e-05, | |
| "loss": 0.0682, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 340, | |
| "tokens_per_second_per_gpu": 421.4 | |
| }, | |
| { | |
| "epoch": 0.7821100917431193, | |
| "grad_norm": 0.023802319541573524, | |
| "learning_rate": 1.2632441670165056e-05, | |
| "loss": 0.0641, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.08, | |
| "memory/max_allocated (GiB)": 49.08, | |
| "step": 341, | |
| "tokens_per_second_per_gpu": 439.55 | |
| }, | |
| { | |
| "epoch": 0.7844036697247706, | |
| "grad_norm": 0.024973031133413315, | |
| "learning_rate": 1.2382024881020937e-05, | |
| "loss": 0.0615, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 342, | |
| "tokens_per_second_per_gpu": 492.26 | |
| }, | |
| { | |
| "epoch": 0.786697247706422, | |
| "grad_norm": 0.029818380251526833, | |
| "learning_rate": 1.213376383442153e-05, | |
| "loss": 0.0746, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 343, | |
| "tokens_per_second_per_gpu": 394.15 | |
| }, | |
| { | |
| "epoch": 0.7889908256880734, | |
| "grad_norm": 0.028851691633462906, | |
| "learning_rate": 1.188767275725966e-05, | |
| "loss": 0.0744, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 344, | |
| "tokens_per_second_per_gpu": 422.33 | |
| }, | |
| { | |
| "epoch": 0.7912844036697247, | |
| "grad_norm": 0.03523954004049301, | |
| "learning_rate": 1.164376575207547e-05, | |
| "loss": 0.077, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 345, | |
| "tokens_per_second_per_gpu": 286.3 | |
| }, | |
| { | |
| "epoch": 0.7935779816513762, | |
| "grad_norm": 0.023627813905477524, | |
| "learning_rate": 1.140205679624834e-05, | |
| "loss": 0.0641, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 346, | |
| "tokens_per_second_per_gpu": 351.44 | |
| }, | |
| { | |
| "epoch": 0.7958715596330275, | |
| "grad_norm": 0.026164906099438667, | |
| "learning_rate": 1.1162559741195733e-05, | |
| "loss": 0.0658, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 347, | |
| "tokens_per_second_per_gpu": 389.38 | |
| }, | |
| { | |
| "epoch": 0.7981651376146789, | |
| "grad_norm": 0.023336883634328842, | |
| "learning_rate": 1.092528831157959e-05, | |
| "loss": 0.062, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 348, | |
| "tokens_per_second_per_gpu": 472.15 | |
| }, | |
| { | |
| "epoch": 0.8004587155963303, | |
| "grad_norm": 0.02306864783167839, | |
| "learning_rate": 1.0690256104519764e-05, | |
| "loss": 0.0629, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 349, | |
| "tokens_per_second_per_gpu": 422.0 | |
| }, | |
| { | |
| "epoch": 0.8027522935779816, | |
| "grad_norm": 0.026163572445511818, | |
| "learning_rate": 1.0457476588814774e-05, | |
| "loss": 0.0667, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.77, | |
| "memory/max_allocated (GiB)": 48.77, | |
| "step": 350, | |
| "tokens_per_second_per_gpu": 389.59 | |
| }, | |
| { | |
| "epoch": 0.805045871559633, | |
| "grad_norm": 0.024867909029126167, | |
| "learning_rate": 1.0226963104170002e-05, | |
| "loss": 0.0674, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 351, | |
| "tokens_per_second_per_gpu": 429.91 | |
| }, | |
| { | |
| "epoch": 0.8073394495412844, | |
| "grad_norm": 0.023188138380646706, | |
| "learning_rate": 9.998728860433276e-06, | |
| "loss": 0.0645, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 352, | |
| "tokens_per_second_per_gpu": 388.46 | |
| }, | |
| { | |
| "epoch": 0.8096330275229358, | |
| "grad_norm": 0.03035775013267994, | |
| "learning_rate": 9.772786936837785e-06, | |
| "loss": 0.0707, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 353, | |
| "tokens_per_second_per_gpu": 397.77 | |
| }, | |
| { | |
| "epoch": 0.8119266055045872, | |
| "grad_norm": 0.04821021109819412, | |
| "learning_rate": 9.549150281252633e-06, | |
| "loss": 0.0646, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 354, | |
| "tokens_per_second_per_gpu": 445.4 | |
| }, | |
| { | |
| "epoch": 0.8142201834862385, | |
| "grad_norm": 0.030557144433259964, | |
| "learning_rate": 9.327831709440792e-06, | |
| "loss": 0.0659, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 355, | |
| "tokens_per_second_per_gpu": 382.05 | |
| }, | |
| { | |
| "epoch": 0.8165137614678899, | |
| "grad_norm": 0.02662436105310917, | |
| "learning_rate": 9.108843904324715e-06, | |
| "loss": 0.0626, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 356, | |
| "tokens_per_second_per_gpu": 412.62 | |
| }, | |
| { | |
| "epoch": 0.8188073394495413, | |
| "grad_norm": 0.027914568781852722, | |
| "learning_rate": 8.8921994152595e-06, | |
| "loss": 0.0681, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 357, | |
| "tokens_per_second_per_gpu": 297.66 | |
| }, | |
| { | |
| "epoch": 0.8211009174311926, | |
| "grad_norm": 0.027242561802268028, | |
| "learning_rate": 8.677910657313782e-06, | |
| "loss": 0.067, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 358, | |
| "tokens_per_second_per_gpu": 457.91 | |
| }, | |
| { | |
| "epoch": 0.823394495412844, | |
| "grad_norm": 0.030475802719593048, | |
| "learning_rate": 8.465989910558209e-06, | |
| "loss": 0.0689, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 359, | |
| "tokens_per_second_per_gpu": 368.2 | |
| }, | |
| { | |
| "epoch": 0.8256880733944955, | |
| "grad_norm": 0.028360676020383835, | |
| "learning_rate": 8.256449319361748e-06, | |
| "loss": 0.0687, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 360, | |
| "tokens_per_second_per_gpu": 384.64 | |
| }, | |
| { | |
| "epoch": 0.8279816513761468, | |
| "grad_norm": 0.031053343787789345, | |
| "learning_rate": 8.049300891695744e-06, | |
| "loss": 0.0754, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 361, | |
| "tokens_per_second_per_gpu": 320.92 | |
| }, | |
| { | |
| "epoch": 0.8302752293577982, | |
| "grad_norm": 0.030271202325820923, | |
| "learning_rate": 7.844556498445788e-06, | |
| "loss": 0.072, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 362, | |
| "tokens_per_second_per_gpu": 437.12 | |
| }, | |
| { | |
| "epoch": 0.8325688073394495, | |
| "grad_norm": 0.027202172204852104, | |
| "learning_rate": 7.642227872731417e-06, | |
| "loss": 0.0696, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 363, | |
| "tokens_per_second_per_gpu": 332.31 | |
| }, | |
| { | |
| "epoch": 0.8348623853211009, | |
| "grad_norm": 0.02677847445011139, | |
| "learning_rate": 7.4423266092337855e-06, | |
| "loss": 0.0703, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 364, | |
| "tokens_per_second_per_gpu": 364.39 | |
| }, | |
| { | |
| "epoch": 0.8371559633027523, | |
| "grad_norm": 0.0259072408080101, | |
| "learning_rate": 7.244864163531162e-06, | |
| "loss": 0.0678, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.77, | |
| "memory/max_allocated (GiB)": 48.77, | |
| "step": 365, | |
| "tokens_per_second_per_gpu": 367.02 | |
| }, | |
| { | |
| "epoch": 0.8394495412844036, | |
| "grad_norm": 0.02673807553946972, | |
| "learning_rate": 7.049851851442468e-06, | |
| "loss": 0.0661, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 366, | |
| "tokens_per_second_per_gpu": 475.35 | |
| }, | |
| { | |
| "epoch": 0.841743119266055, | |
| "grad_norm": 0.027974814176559448, | |
| "learning_rate": 6.857300848378856e-06, | |
| "loss": 0.0747, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 367, | |
| "tokens_per_second_per_gpu": 409.23 | |
| }, | |
| { | |
| "epoch": 0.8440366972477065, | |
| "grad_norm": 0.022259563207626343, | |
| "learning_rate": 6.667222188703226e-06, | |
| "loss": 0.064, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 368, | |
| "tokens_per_second_per_gpu": 440.59 | |
| }, | |
| { | |
| "epoch": 0.8463302752293578, | |
| "grad_norm": 0.02939799055457115, | |
| "learning_rate": 6.479626765097918e-06, | |
| "loss": 0.0693, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 369, | |
| "tokens_per_second_per_gpu": 455.83 | |
| }, | |
| { | |
| "epoch": 0.8486238532110092, | |
| "grad_norm": 0.029195845127105713, | |
| "learning_rate": 6.294525327940515e-06, | |
| "loss": 0.0711, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 370, | |
| "tokens_per_second_per_gpu": 394.89 | |
| }, | |
| { | |
| "epoch": 0.8509174311926605, | |
| "grad_norm": 0.0236493106931448, | |
| "learning_rate": 6.111928484687723e-06, | |
| "loss": 0.0643, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 371, | |
| "tokens_per_second_per_gpu": 408.68 | |
| }, | |
| { | |
| "epoch": 0.8532110091743119, | |
| "grad_norm": 0.02727104350924492, | |
| "learning_rate": 5.931846699267557e-06, | |
| "loss": 0.067, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 372, | |
| "tokens_per_second_per_gpu": 509.27 | |
| }, | |
| { | |
| "epoch": 0.8555045871559633, | |
| "grad_norm": 0.034410908818244934, | |
| "learning_rate": 5.7542902914796745e-06, | |
| "loss": 0.0624, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 373, | |
| "tokens_per_second_per_gpu": 556.2 | |
| }, | |
| { | |
| "epoch": 0.8577981651376146, | |
| "grad_norm": 0.0287538543343544, | |
| "learning_rate": 5.579269436403967e-06, | |
| "loss": 0.0651, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.08, | |
| "memory/max_allocated (GiB)": 49.08, | |
| "step": 374, | |
| "tokens_per_second_per_gpu": 381.61 | |
| }, | |
| { | |
| "epoch": 0.8600917431192661, | |
| "grad_norm": 0.02870243228971958, | |
| "learning_rate": 5.4067941638174806e-06, | |
| "loss": 0.0731, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 375, | |
| "tokens_per_second_per_gpu": 361.13 | |
| }, | |
| { | |
| "epoch": 0.8623853211009175, | |
| "grad_norm": 0.026416806504130363, | |
| "learning_rate": 5.2368743576196536e-06, | |
| "loss": 0.064, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 376, | |
| "tokens_per_second_per_gpu": 326.97 | |
| }, | |
| { | |
| "epoch": 0.8646788990825688, | |
| "grad_norm": 0.023003704845905304, | |
| "learning_rate": 5.0695197552659e-06, | |
| "loss": 0.0625, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 377, | |
| "tokens_per_second_per_gpu": 438.76 | |
| }, | |
| { | |
| "epoch": 0.8669724770642202, | |
| "grad_norm": 0.037476420402526855, | |
| "learning_rate": 4.9047399472095746e-06, | |
| "loss": 0.0697, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 378, | |
| "tokens_per_second_per_gpu": 345.81 | |
| }, | |
| { | |
| "epoch": 0.8692660550458715, | |
| "grad_norm": 0.02971925400197506, | |
| "learning_rate": 4.742544376352443e-06, | |
| "loss": 0.0663, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 379, | |
| "tokens_per_second_per_gpu": 436.62 | |
| }, | |
| { | |
| "epoch": 0.8715596330275229, | |
| "grad_norm": 0.023713113740086555, | |
| "learning_rate": 4.582942337503465e-06, | |
| "loss": 0.0602, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 380, | |
| "tokens_per_second_per_gpu": 448.46 | |
| }, | |
| { | |
| "epoch": 0.8738532110091743, | |
| "grad_norm": 0.02941006049513817, | |
| "learning_rate": 4.425942976846187e-06, | |
| "loss": 0.0725, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 381, | |
| "tokens_per_second_per_gpu": 329.17 | |
| }, | |
| { | |
| "epoch": 0.8761467889908257, | |
| "grad_norm": 0.028299743309617043, | |
| "learning_rate": 4.271555291414636e-06, | |
| "loss": 0.072, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 382, | |
| "tokens_per_second_per_gpu": 340.56 | |
| }, | |
| { | |
| "epoch": 0.8784403669724771, | |
| "grad_norm": 0.03180241584777832, | |
| "learning_rate": 4.119788128577667e-06, | |
| "loss": 0.0766, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 383, | |
| "tokens_per_second_per_gpu": 446.49 | |
| }, | |
| { | |
| "epoch": 0.8807339449541285, | |
| "grad_norm": 0.026926379650831223, | |
| "learning_rate": 3.9706501855319765e-06, | |
| "loss": 0.0683, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 384, | |
| "tokens_per_second_per_gpu": 440.01 | |
| }, | |
| { | |
| "epoch": 0.8830275229357798, | |
| "grad_norm": 0.03347824513912201, | |
| "learning_rate": 3.824150008803767e-06, | |
| "loss": 0.0751, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 385, | |
| "tokens_per_second_per_gpu": 343.0 | |
| }, | |
| { | |
| "epoch": 0.8853211009174312, | |
| "grad_norm": 0.030953101813793182, | |
| "learning_rate": 3.680295993758881e-06, | |
| "loss": 0.0689, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 386, | |
| "tokens_per_second_per_gpu": 393.96 | |
| }, | |
| { | |
| "epoch": 0.8876146788990825, | |
| "grad_norm": 0.032475098967552185, | |
| "learning_rate": 3.539096384121743e-06, | |
| "loss": 0.0828, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 387, | |
| "tokens_per_second_per_gpu": 378.9 | |
| }, | |
| { | |
| "epoch": 0.8899082568807339, | |
| "grad_norm": 0.02490062825381756, | |
| "learning_rate": 3.40055927150294e-06, | |
| "loss": 0.0623, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 388, | |
| "tokens_per_second_per_gpu": 408.26 | |
| }, | |
| { | |
| "epoch": 0.8922018348623854, | |
| "grad_norm": 0.02600006014108658, | |
| "learning_rate": 3.2646925949355312e-06, | |
| "loss": 0.0658, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 389, | |
| "tokens_per_second_per_gpu": 446.25 | |
| }, | |
| { | |
| "epoch": 0.8944954128440367, | |
| "grad_norm": 0.024244820699095726, | |
| "learning_rate": 3.1315041404200663e-06, | |
| "loss": 0.0655, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 390, | |
| "tokens_per_second_per_gpu": 420.39 | |
| }, | |
| { | |
| "epoch": 0.8967889908256881, | |
| "grad_norm": 0.0253219585865736, | |
| "learning_rate": 3.00100154047841e-06, | |
| "loss": 0.0674, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 391, | |
| "tokens_per_second_per_gpu": 463.53 | |
| }, | |
| { | |
| "epoch": 0.8990825688073395, | |
| "grad_norm": 0.027757421135902405, | |
| "learning_rate": 2.8731922737163685e-06, | |
| "loss": 0.0681, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 392, | |
| "tokens_per_second_per_gpu": 472.68 | |
| }, | |
| { | |
| "epoch": 0.9013761467889908, | |
| "grad_norm": 0.02381259575486183, | |
| "learning_rate": 2.7480836643950956e-06, | |
| "loss": 0.0596, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.08, | |
| "memory/max_allocated (GiB)": 49.08, | |
| "step": 393, | |
| "tokens_per_second_per_gpu": 452.37 | |
| }, | |
| { | |
| "epoch": 0.9036697247706422, | |
| "grad_norm": 0.024906722828745842, | |
| "learning_rate": 2.6256828820113766e-06, | |
| "loss": 0.0669, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 394, | |
| "tokens_per_second_per_gpu": 327.71 | |
| }, | |
| { | |
| "epoch": 0.9059633027522935, | |
| "grad_norm": 0.025515113025903702, | |
| "learning_rate": 2.5059969408867843e-06, | |
| "loss": 0.0636, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 395, | |
| "tokens_per_second_per_gpu": 409.3 | |
| }, | |
| { | |
| "epoch": 0.908256880733945, | |
| "grad_norm": 0.026188403367996216, | |
| "learning_rate": 2.3890326997656975e-06, | |
| "loss": 0.0688, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 396, | |
| "tokens_per_second_per_gpu": 371.81 | |
| }, | |
| { | |
| "epoch": 0.9105504587155964, | |
| "grad_norm": 0.027840575203299522, | |
| "learning_rate": 2.274796861422246e-06, | |
| "loss": 0.0737, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 397, | |
| "tokens_per_second_per_gpu": 447.44 | |
| }, | |
| { | |
| "epoch": 0.9128440366972477, | |
| "grad_norm": 0.0268483255058527, | |
| "learning_rate": 2.163295972276219e-06, | |
| "loss": 0.0583, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 398, | |
| "tokens_per_second_per_gpu": 383.02 | |
| }, | |
| { | |
| "epoch": 0.9151376146788991, | |
| "grad_norm": 0.027824856340885162, | |
| "learning_rate": 2.054536422017922e-06, | |
| "loss": 0.0767, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 399, | |
| "tokens_per_second_per_gpu": 331.96 | |
| }, | |
| { | |
| "epoch": 0.9174311926605505, | |
| "grad_norm": 0.024313461035490036, | |
| "learning_rate": 1.9485244432419667e-06, | |
| "loss": 0.0694, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 400, | |
| "tokens_per_second_per_gpu": 371.17 | |
| }, | |
| { | |
| "epoch": 0.9197247706422018, | |
| "grad_norm": 0.02038564346730709, | |
| "learning_rate": 1.8452661110901715e-06, | |
| "loss": 0.0563, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 401, | |
| "tokens_per_second_per_gpu": 474.9 | |
| }, | |
| { | |
| "epoch": 0.9220183486238532, | |
| "grad_norm": 0.030249858275055885, | |
| "learning_rate": 1.7447673429033362e-06, | |
| "loss": 0.0685, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 402, | |
| "tokens_per_second_per_gpu": 324.48 | |
| }, | |
| { | |
| "epoch": 0.9243119266055045, | |
| "grad_norm": 0.027523530647158623, | |
| "learning_rate": 1.6470338978822108e-06, | |
| "loss": 0.0666, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 403, | |
| "tokens_per_second_per_gpu": 405.82 | |
| }, | |
| { | |
| "epoch": 0.926605504587156, | |
| "grad_norm": 0.026385333389043808, | |
| "learning_rate": 1.5520713767574246e-06, | |
| "loss": 0.0768, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 404, | |
| "tokens_per_second_per_gpu": 380.7 | |
| }, | |
| { | |
| "epoch": 0.9288990825688074, | |
| "grad_norm": 0.02548050880432129, | |
| "learning_rate": 1.4598852214685488e-06, | |
| "loss": 0.0649, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 405, | |
| "tokens_per_second_per_gpu": 421.77 | |
| }, | |
| { | |
| "epoch": 0.9311926605504587, | |
| "grad_norm": 0.0276033915579319, | |
| "learning_rate": 1.3704807148521903e-06, | |
| "loss": 0.0722, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 406, | |
| "tokens_per_second_per_gpu": 391.2 | |
| }, | |
| { | |
| "epoch": 0.9334862385321101, | |
| "grad_norm": 0.025824090465903282, | |
| "learning_rate": 1.2838629803393342e-06, | |
| "loss": 0.0658, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 407, | |
| "tokens_per_second_per_gpu": 363.71 | |
| }, | |
| { | |
| "epoch": 0.9357798165137615, | |
| "grad_norm": 0.032180044800043106, | |
| "learning_rate": 1.2000369816616674e-06, | |
| "loss": 0.0677, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 408, | |
| "tokens_per_second_per_gpu": 490.46 | |
| }, | |
| { | |
| "epoch": 0.9380733944954128, | |
| "grad_norm": 0.03195993974804878, | |
| "learning_rate": 1.119007522567167e-06, | |
| "loss": 0.08, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 409, | |
| "tokens_per_second_per_gpu": 443.35 | |
| }, | |
| { | |
| "epoch": 0.9403669724770642, | |
| "grad_norm": 0.024462653324007988, | |
| "learning_rate": 1.0407792465447986e-06, | |
| "loss": 0.0589, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 410, | |
| "tokens_per_second_per_gpu": 511.33 | |
| }, | |
| { | |
| "epoch": 0.9426605504587156, | |
| "grad_norm": 0.02783488854765892, | |
| "learning_rate": 9.653566365584176e-07, | |
| "loss": 0.0705, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 411, | |
| "tokens_per_second_per_gpu": 407.62 | |
| }, | |
| { | |
| "epoch": 0.944954128440367, | |
| "grad_norm": 0.03449428081512451, | |
| "learning_rate": 8.927440147898702e-07, | |
| "loss": 0.0801, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 412, | |
| "tokens_per_second_per_gpu": 306.83 | |
| }, | |
| { | |
| "epoch": 0.9472477064220184, | |
| "grad_norm": 0.027761735022068024, | |
| "learning_rate": 8.229455423913013e-07, | |
| "loss": 0.0749, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.77, | |
| "memory/max_allocated (GiB)": 48.77, | |
| "step": 413, | |
| "tokens_per_second_per_gpu": 327.64 | |
| }, | |
| { | |
| "epoch": 0.9495412844036697, | |
| "grad_norm": 0.029755057767033577, | |
| "learning_rate": 7.559652192467126e-07, | |
| "loss": 0.0778, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 414, | |
| "tokens_per_second_per_gpu": 384.79 | |
| }, | |
| { | |
| "epoch": 0.9518348623853211, | |
| "grad_norm": 0.028378870338201523, | |
| "learning_rate": 6.918068837427128e-07, | |
| "loss": 0.0672, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 415, | |
| "tokens_per_second_per_gpu": 406.2 | |
| }, | |
| { | |
| "epoch": 0.9541284403669725, | |
| "grad_norm": 0.02773345075547695, | |
| "learning_rate": 6.304742125485874e-07, | |
| "loss": 0.06, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 416, | |
| "tokens_per_second_per_gpu": 387.18 | |
| }, | |
| { | |
| "epoch": 0.9564220183486238, | |
| "grad_norm": 0.0268245879560709, | |
| "learning_rate": 5.719707204055735e-07, | |
| "loss": 0.0621, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 417, | |
| "tokens_per_second_per_gpu": 411.81 | |
| }, | |
| { | |
| "epoch": 0.9587155963302753, | |
| "grad_norm": 0.033236313611269, | |
| "learning_rate": 5.162997599254704e-07, | |
| "loss": 0.0578, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.08, | |
| "memory/max_allocated (GiB)": 49.08, | |
| "step": 418, | |
| "tokens_per_second_per_gpu": 471.32 | |
| }, | |
| { | |
| "epoch": 0.9610091743119266, | |
| "grad_norm": 0.022961758077144623, | |
| "learning_rate": 4.634645213984934e-07, | |
| "loss": 0.0643, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 419, | |
| "tokens_per_second_per_gpu": 436.57 | |
| }, | |
| { | |
| "epoch": 0.963302752293578, | |
| "grad_norm": 0.028307458385825157, | |
| "learning_rate": 4.134680326104645e-07, | |
| "loss": 0.0691, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 420, | |
| "tokens_per_second_per_gpu": 492.18 | |
| }, | |
| { | |
| "epoch": 0.9655963302752294, | |
| "grad_norm": 0.026976363733410835, | |
| "learning_rate": 3.663131586692792e-07, | |
| "loss": 0.0655, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 421, | |
| "tokens_per_second_per_gpu": 327.53 | |
| }, | |
| { | |
| "epoch": 0.9678899082568807, | |
| "grad_norm": 0.024504244327545166, | |
| "learning_rate": 3.2200260184075406e-07, | |
| "loss": 0.0658, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 422, | |
| "tokens_per_second_per_gpu": 419.83 | |
| }, | |
| { | |
| "epoch": 0.9701834862385321, | |
| "grad_norm": 0.023533035069704056, | |
| "learning_rate": 2.805389013937454e-07, | |
| "loss": 0.0556, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 423, | |
| "tokens_per_second_per_gpu": 401.57 | |
| }, | |
| { | |
| "epoch": 0.9724770642201835, | |
| "grad_norm": 0.022774042561650276, | |
| "learning_rate": 2.419244334546267e-07, | |
| "loss": 0.0581, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.73, | |
| "memory/max_allocated (GiB)": 48.73, | |
| "step": 424, | |
| "tokens_per_second_per_gpu": 329.45 | |
| }, | |
| { | |
| "epoch": 0.9747706422018348, | |
| "grad_norm": 0.03273961320519447, | |
| "learning_rate": 2.061614108711474e-07, | |
| "loss": 0.0824, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 425, | |
| "tokens_per_second_per_gpu": 373.34 | |
| }, | |
| { | |
| "epoch": 0.9770642201834863, | |
| "grad_norm": 0.02143704518675804, | |
| "learning_rate": 1.732518830856067e-07, | |
| "loss": 0.0588, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 426, | |
| "tokens_per_second_per_gpu": 431.87 | |
| }, | |
| { | |
| "epoch": 0.9793577981651376, | |
| "grad_norm": 0.026173925027251244, | |
| "learning_rate": 1.431977360173975e-07, | |
| "loss": 0.0678, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 427, | |
| "tokens_per_second_per_gpu": 439.15 | |
| }, | |
| { | |
| "epoch": 0.981651376146789, | |
| "grad_norm": 0.026415711268782616, | |
| "learning_rate": 1.16000691954965e-07, | |
| "loss": 0.067, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 428, | |
| "tokens_per_second_per_gpu": 480.36 | |
| }, | |
| { | |
| "epoch": 0.9839449541284404, | |
| "grad_norm": 0.025120330974459648, | |
| "learning_rate": 9.1662309457069e-08, | |
| "loss": 0.0644, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 429, | |
| "tokens_per_second_per_gpu": 414.88 | |
| }, | |
| { | |
| "epoch": 0.9862385321100917, | |
| "grad_norm": 0.023429665714502335, | |
| "learning_rate": 7.018398326350539e-08, | |
| "loss": 0.0645, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 430, | |
| "tokens_per_second_per_gpu": 330.04 | |
| }, | |
| { | |
| "epoch": 0.9885321100917431, | |
| "grad_norm": 0.03130911663174629, | |
| "learning_rate": 5.15669442151423e-08, | |
| "loss": 0.0723, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 431, | |
| "tokens_per_second_per_gpu": 269.97 | |
| }, | |
| { | |
| "epoch": 0.9908256880733946, | |
| "grad_norm": 0.026494460180401802, | |
| "learning_rate": 3.581225918342646e-08, | |
| "loss": 0.0685, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 432, | |
| "tokens_per_second_per_gpu": 387.46 | |
| }, | |
| { | |
| "epoch": 0.9931192660550459, | |
| "grad_norm": 0.032140735536813736, | |
| "learning_rate": 2.292083100920994e-08, | |
| "loss": 0.0631, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 433, | |
| "tokens_per_second_per_gpu": 427.4 | |
| }, | |
| { | |
| "epoch": 0.9954128440366973, | |
| "grad_norm": 0.025799578055739403, | |
| "learning_rate": 1.2893398451024886e-08, | |
| "loss": 0.0695, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 434, | |
| "tokens_per_second_per_gpu": 461.05 | |
| }, | |
| { | |
| "epoch": 0.9977064220183486, | |
| "grad_norm": 0.031855881214141846, | |
| "learning_rate": 5.730536142745102e-09, | |
| "loss": 0.0818, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 435, | |
| "tokens_per_second_per_gpu": 415.34 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.026343608275055885, | |
| "learning_rate": 1.432654560679092e-09, | |
| "loss": 0.0674, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 436, | |
| "tokens_per_second_per_gpu": 342.89 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 436, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 60, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.337198826144924e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |