| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.27522935779816515, | |
| "eval_steps": 500, | |
| "global_step": 120, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0022935779816513763, | |
| "grad_norm": 0.12869106233119965, | |
| "learning_rate": 0.0, | |
| "loss": 0.1978, | |
| "memory/device_reserved (GiB)": 50.77, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 1, | |
| "tokens_per_second_per_gpu": 354.96 | |
| }, | |
| { | |
| "epoch": 0.0045871559633027525, | |
| "grad_norm": 0.15667210519313812, | |
| "learning_rate": 4.7619047619047615e-06, | |
| "loss": 0.2353, | |
| "memory/device_reserved (GiB)": 50.77, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 2, | |
| "tokens_per_second_per_gpu": 406.37 | |
| }, | |
| { | |
| "epoch": 0.006880733944954129, | |
| "grad_norm": 0.2217973917722702, | |
| "learning_rate": 9.523809523809523e-06, | |
| "loss": 0.2243, | |
| "memory/device_reserved (GiB)": 50.87, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 3, | |
| "tokens_per_second_per_gpu": 371.18 | |
| }, | |
| { | |
| "epoch": 0.009174311926605505, | |
| "grad_norm": 0.15948686003684998, | |
| "learning_rate": 1.4285714285714285e-05, | |
| "loss": 0.2392, | |
| "memory/device_reserved (GiB)": 50.87, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 4, | |
| "tokens_per_second_per_gpu": 414.48 | |
| }, | |
| { | |
| "epoch": 0.011467889908256881, | |
| "grad_norm": 0.153566375374794, | |
| "learning_rate": 1.9047619047619046e-05, | |
| "loss": 0.2182, | |
| "memory/device_reserved (GiB)": 50.87, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 5, | |
| "tokens_per_second_per_gpu": 369.22 | |
| }, | |
| { | |
| "epoch": 0.013761467889908258, | |
| "grad_norm": 0.1521972268819809, | |
| "learning_rate": 2.380952380952381e-05, | |
| "loss": 0.2112, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 6, | |
| "tokens_per_second_per_gpu": 429.31 | |
| }, | |
| { | |
| "epoch": 0.016055045871559634, | |
| "grad_norm": 0.168710395693779, | |
| "learning_rate": 2.857142857142857e-05, | |
| "loss": 0.226, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 7, | |
| "tokens_per_second_per_gpu": 417.78 | |
| }, | |
| { | |
| "epoch": 0.01834862385321101, | |
| "grad_norm": 0.13864850997924805, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.1884, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 8, | |
| "tokens_per_second_per_gpu": 439.56 | |
| }, | |
| { | |
| "epoch": 0.020642201834862386, | |
| "grad_norm": 0.15227903425693512, | |
| "learning_rate": 3.809523809523809e-05, | |
| "loss": 0.1996, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 9, | |
| "tokens_per_second_per_gpu": 411.33 | |
| }, | |
| { | |
| "epoch": 0.022935779816513763, | |
| "grad_norm": 0.13421630859375, | |
| "learning_rate": 4.2857142857142856e-05, | |
| "loss": 0.1599, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 10, | |
| "tokens_per_second_per_gpu": 496.3 | |
| }, | |
| { | |
| "epoch": 0.02522935779816514, | |
| "grad_norm": 0.14955134689807892, | |
| "learning_rate": 4.761904761904762e-05, | |
| "loss": 0.1735, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 11, | |
| "tokens_per_second_per_gpu": 372.95 | |
| }, | |
| { | |
| "epoch": 0.027522935779816515, | |
| "grad_norm": 0.1432778388261795, | |
| "learning_rate": 5.2380952380952384e-05, | |
| "loss": 0.1515, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 12, | |
| "tokens_per_second_per_gpu": 398.65 | |
| }, | |
| { | |
| "epoch": 0.02981651376146789, | |
| "grad_norm": 0.14163611829280853, | |
| "learning_rate": 5.714285714285714e-05, | |
| "loss": 0.1517, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 13, | |
| "tokens_per_second_per_gpu": 440.5 | |
| }, | |
| { | |
| "epoch": 0.03211009174311927, | |
| "grad_norm": 0.15477906167507172, | |
| "learning_rate": 6.19047619047619e-05, | |
| "loss": 0.1444, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 14, | |
| "tokens_per_second_per_gpu": 385.32 | |
| }, | |
| { | |
| "epoch": 0.034403669724770644, | |
| "grad_norm": 0.1055532768368721, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 0.1292, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 15, | |
| "tokens_per_second_per_gpu": 453.02 | |
| }, | |
| { | |
| "epoch": 0.03669724770642202, | |
| "grad_norm": 0.10180933028459549, | |
| "learning_rate": 7.142857142857143e-05, | |
| "loss": 0.1208, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 16, | |
| "tokens_per_second_per_gpu": 474.27 | |
| }, | |
| { | |
| "epoch": 0.0389908256880734, | |
| "grad_norm": 0.07999677956104279, | |
| "learning_rate": 7.619047619047618e-05, | |
| "loss": 0.132, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 17, | |
| "tokens_per_second_per_gpu": 382.05 | |
| }, | |
| { | |
| "epoch": 0.04128440366972477, | |
| "grad_norm": 0.09194924682378769, | |
| "learning_rate": 8.095238095238096e-05, | |
| "loss": 0.1067, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 18, | |
| "tokens_per_second_per_gpu": 398.61 | |
| }, | |
| { | |
| "epoch": 0.04357798165137615, | |
| "grad_norm": 0.0931428000330925, | |
| "learning_rate": 8.571428571428571e-05, | |
| "loss": 0.1088, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 19, | |
| "tokens_per_second_per_gpu": 447.07 | |
| }, | |
| { | |
| "epoch": 0.045871559633027525, | |
| "grad_norm": 0.06202042102813721, | |
| "learning_rate": 9.047619047619048e-05, | |
| "loss": 0.0962, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 20, | |
| "tokens_per_second_per_gpu": 382.57 | |
| }, | |
| { | |
| "epoch": 0.0481651376146789, | |
| "grad_norm": 0.04220607504248619, | |
| "learning_rate": 9.523809523809524e-05, | |
| "loss": 0.0963, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 21, | |
| "tokens_per_second_per_gpu": 423.29 | |
| }, | |
| { | |
| "epoch": 0.05045871559633028, | |
| "grad_norm": 0.050066106021404266, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1032, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 22, | |
| "tokens_per_second_per_gpu": 381.35 | |
| }, | |
| { | |
| "epoch": 0.052752293577981654, | |
| "grad_norm": 0.0557384118437767, | |
| "learning_rate": 9.999856734543933e-05, | |
| "loss": 0.1025, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 23, | |
| "tokens_per_second_per_gpu": 393.62 | |
| }, | |
| { | |
| "epoch": 0.05504587155963303, | |
| "grad_norm": 0.04612402245402336, | |
| "learning_rate": 9.999426946385727e-05, | |
| "loss": 0.0985, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 24, | |
| "tokens_per_second_per_gpu": 515.46 | |
| }, | |
| { | |
| "epoch": 0.05733944954128441, | |
| "grad_norm": 0.09721734374761581, | |
| "learning_rate": 9.998710660154898e-05, | |
| "loss": 0.1062, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 25, | |
| "tokens_per_second_per_gpu": 398.15 | |
| }, | |
| { | |
| "epoch": 0.05963302752293578, | |
| "grad_norm": 0.036745935678482056, | |
| "learning_rate": 9.997707916899079e-05, | |
| "loss": 0.1045, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 26, | |
| "tokens_per_second_per_gpu": 422.42 | |
| }, | |
| { | |
| "epoch": 0.06192660550458716, | |
| "grad_norm": 0.04298936203122139, | |
| "learning_rate": 9.996418774081658e-05, | |
| "loss": 0.0923, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 27, | |
| "tokens_per_second_per_gpu": 440.87 | |
| }, | |
| { | |
| "epoch": 0.06422018348623854, | |
| "grad_norm": 0.033536747097969055, | |
| "learning_rate": 9.994843305578486e-05, | |
| "loss": 0.096, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 28, | |
| "tokens_per_second_per_gpu": 370.28 | |
| }, | |
| { | |
| "epoch": 0.06651376146788991, | |
| "grad_norm": 0.03256046772003174, | |
| "learning_rate": 9.99298160167365e-05, | |
| "loss": 0.0832, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 29, | |
| "tokens_per_second_per_gpu": 357.19 | |
| }, | |
| { | |
| "epoch": 0.06880733944954129, | |
| "grad_norm": 0.042709868401288986, | |
| "learning_rate": 9.990833769054293e-05, | |
| "loss": 0.086, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 30, | |
| "tokens_per_second_per_gpu": 441.89 | |
| }, | |
| { | |
| "epoch": 0.07110091743119266, | |
| "grad_norm": 0.04347776621580124, | |
| "learning_rate": 9.988399930804504e-05, | |
| "loss": 0.1, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 48.77, | |
| "memory/max_allocated (GiB)": 48.77, | |
| "step": 31, | |
| "tokens_per_second_per_gpu": 348.66 | |
| }, | |
| { | |
| "epoch": 0.07339449541284404, | |
| "grad_norm": 0.030414681881666183, | |
| "learning_rate": 9.985680226398261e-05, | |
| "loss": 0.0811, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 32, | |
| "tokens_per_second_per_gpu": 435.28 | |
| }, | |
| { | |
| "epoch": 0.07568807339449542, | |
| "grad_norm": 0.034023743122816086, | |
| "learning_rate": 9.98267481169144e-05, | |
| "loss": 0.0743, | |
| "memory/device_reserved (GiB)": 50.93, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 33, | |
| "tokens_per_second_per_gpu": 482.51 | |
| }, | |
| { | |
| "epoch": 0.0779816513761468, | |
| "grad_norm": 0.03136487305164337, | |
| "learning_rate": 9.979383858912885e-05, | |
| "loss": 0.0739, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.08, | |
| "memory/max_allocated (GiB)": 49.08, | |
| "step": 34, | |
| "tokens_per_second_per_gpu": 496.59 | |
| }, | |
| { | |
| "epoch": 0.08027522935779817, | |
| "grad_norm": 0.028108298778533936, | |
| "learning_rate": 9.975807556654537e-05, | |
| "loss": 0.077, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 35, | |
| "tokens_per_second_per_gpu": 349.1 | |
| }, | |
| { | |
| "epoch": 0.08256880733944955, | |
| "grad_norm": 0.028020795434713364, | |
| "learning_rate": 9.971946109860626e-05, | |
| "loss": 0.0775, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 36, | |
| "tokens_per_second_per_gpu": 351.02 | |
| }, | |
| { | |
| "epoch": 0.08486238532110092, | |
| "grad_norm": 0.028756650164723396, | |
| "learning_rate": 9.967799739815925e-05, | |
| "loss": 0.0788, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 37, | |
| "tokens_per_second_per_gpu": 534.52 | |
| }, | |
| { | |
| "epoch": 0.0871559633027523, | |
| "grad_norm": 0.02806459739804268, | |
| "learning_rate": 9.963368684133072e-05, | |
| "loss": 0.0809, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 38, | |
| "tokens_per_second_per_gpu": 367.94 | |
| }, | |
| { | |
| "epoch": 0.08944954128440367, | |
| "grad_norm": 0.02387731708586216, | |
| "learning_rate": 9.958653196738954e-05, | |
| "loss": 0.0642, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 39, | |
| "tokens_per_second_per_gpu": 466.74 | |
| }, | |
| { | |
| "epoch": 0.09174311926605505, | |
| "grad_norm": 0.027889851480722427, | |
| "learning_rate": 9.953653547860151e-05, | |
| "loss": 0.0904, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 40, | |
| "tokens_per_second_per_gpu": 371.51 | |
| }, | |
| { | |
| "epoch": 0.09403669724770643, | |
| "grad_norm": 0.031659577041864395, | |
| "learning_rate": 9.948370024007454e-05, | |
| "loss": 0.081, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 41, | |
| "tokens_per_second_per_gpu": 479.04 | |
| }, | |
| { | |
| "epoch": 0.0963302752293578, | |
| "grad_norm": 0.03186093270778656, | |
| "learning_rate": 9.942802927959443e-05, | |
| "loss": 0.0881, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 42, | |
| "tokens_per_second_per_gpu": 364.73 | |
| }, | |
| { | |
| "epoch": 0.09862385321100918, | |
| "grad_norm": 0.0313677079975605, | |
| "learning_rate": 9.936952578745142e-05, | |
| "loss": 0.0808, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 43, | |
| "tokens_per_second_per_gpu": 418.0 | |
| }, | |
| { | |
| "epoch": 0.10091743119266056, | |
| "grad_norm": 0.0264989472925663, | |
| "learning_rate": 9.93081931162573e-05, | |
| "loss": 0.0664, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 44, | |
| "tokens_per_second_per_gpu": 439.24 | |
| }, | |
| { | |
| "epoch": 0.10321100917431193, | |
| "grad_norm": 0.026272334158420563, | |
| "learning_rate": 9.92440347807533e-05, | |
| "loss": 0.0683, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 45, | |
| "tokens_per_second_per_gpu": 482.81 | |
| }, | |
| { | |
| "epoch": 0.10550458715596331, | |
| "grad_norm": 0.029066840186715126, | |
| "learning_rate": 9.91770544576087e-05, | |
| "loss": 0.0737, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 46, | |
| "tokens_per_second_per_gpu": 389.87 | |
| }, | |
| { | |
| "epoch": 0.10779816513761468, | |
| "grad_norm": 0.024542706087231636, | |
| "learning_rate": 9.910725598521013e-05, | |
| "loss": 0.0737, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 47, | |
| "tokens_per_second_per_gpu": 473.12 | |
| }, | |
| { | |
| "epoch": 0.11009174311926606, | |
| "grad_norm": 0.042941153049468994, | |
| "learning_rate": 9.90346433634416e-05, | |
| "loss": 0.0951, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 48, | |
| "tokens_per_second_per_gpu": 325.12 | |
| }, | |
| { | |
| "epoch": 0.11238532110091744, | |
| "grad_norm": 0.029044413939118385, | |
| "learning_rate": 9.89592207534552e-05, | |
| "loss": 0.0745, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.73, | |
| "memory/max_allocated (GiB)": 48.73, | |
| "step": 49, | |
| "tokens_per_second_per_gpu": 315.62 | |
| }, | |
| { | |
| "epoch": 0.11467889908256881, | |
| "grad_norm": 0.028920788317918777, | |
| "learning_rate": 9.888099247743283e-05, | |
| "loss": 0.0818, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 50, | |
| "tokens_per_second_per_gpu": 441.3 | |
| }, | |
| { | |
| "epoch": 0.11697247706422019, | |
| "grad_norm": 0.026095205917954445, | |
| "learning_rate": 9.879996301833833e-05, | |
| "loss": 0.0688, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 51, | |
| "tokens_per_second_per_gpu": 386.22 | |
| }, | |
| { | |
| "epoch": 0.11926605504587157, | |
| "grad_norm": 0.024823926389217377, | |
| "learning_rate": 9.871613701966067e-05, | |
| "loss": 0.0701, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 52, | |
| "tokens_per_second_per_gpu": 511.32 | |
| }, | |
| { | |
| "epoch": 0.12155963302752294, | |
| "grad_norm": 0.036093298345804214, | |
| "learning_rate": 9.862951928514782e-05, | |
| "loss": 0.0823, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 53, | |
| "tokens_per_second_per_gpu": 323.2 | |
| }, | |
| { | |
| "epoch": 0.12385321100917432, | |
| "grad_norm": 0.03257686272263527, | |
| "learning_rate": 9.854011477853146e-05, | |
| "loss": 0.0769, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 54, | |
| "tokens_per_second_per_gpu": 447.62 | |
| }, | |
| { | |
| "epoch": 0.12614678899082568, | |
| "grad_norm": 0.03413158655166626, | |
| "learning_rate": 9.844792862324258e-05, | |
| "loss": 0.0728, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 55, | |
| "tokens_per_second_per_gpu": 451.05 | |
| }, | |
| { | |
| "epoch": 0.12844036697247707, | |
| "grad_norm": 0.02947932481765747, | |
| "learning_rate": 9.835296610211779e-05, | |
| "loss": 0.0713, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 56, | |
| "tokens_per_second_per_gpu": 457.44 | |
| }, | |
| { | |
| "epoch": 0.13073394495412843, | |
| "grad_norm": 0.0220651775598526, | |
| "learning_rate": 9.825523265709666e-05, | |
| "loss": 0.0607, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 57, | |
| "tokens_per_second_per_gpu": 456.49 | |
| }, | |
| { | |
| "epoch": 0.13302752293577982, | |
| "grad_norm": 0.026394842192530632, | |
| "learning_rate": 9.815473388890983e-05, | |
| "loss": 0.0716, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 58, | |
| "tokens_per_second_per_gpu": 393.95 | |
| }, | |
| { | |
| "epoch": 0.1353211009174312, | |
| "grad_norm": 0.027936838567256927, | |
| "learning_rate": 9.805147555675805e-05, | |
| "loss": 0.0738, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 59, | |
| "tokens_per_second_per_gpu": 464.83 | |
| }, | |
| { | |
| "epoch": 0.13761467889908258, | |
| "grad_norm": 0.023982539772987366, | |
| "learning_rate": 9.794546357798208e-05, | |
| "loss": 0.0608, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 60, | |
| "tokens_per_second_per_gpu": 450.66 | |
| }, | |
| { | |
| "epoch": 0.13990825688073394, | |
| "grad_norm": 0.027479754760861397, | |
| "learning_rate": 9.783670402772379e-05, | |
| "loss": 0.0672, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 61, | |
| "tokens_per_second_per_gpu": 455.94 | |
| }, | |
| { | |
| "epoch": 0.14220183486238533, | |
| "grad_norm": 0.02617599070072174, | |
| "learning_rate": 9.772520313857775e-05, | |
| "loss": 0.0804, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 62, | |
| "tokens_per_second_per_gpu": 394.85 | |
| }, | |
| { | |
| "epoch": 0.1444954128440367, | |
| "grad_norm": 0.030884992331266403, | |
| "learning_rate": 9.761096730023432e-05, | |
| "loss": 0.0768, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 63, | |
| "tokens_per_second_per_gpu": 446.63 | |
| }, | |
| { | |
| "epoch": 0.14678899082568808, | |
| "grad_norm": 0.027579287067055702, | |
| "learning_rate": 9.749400305911322e-05, | |
| "loss": 0.0659, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 64, | |
| "tokens_per_second_per_gpu": 484.34 | |
| }, | |
| { | |
| "epoch": 0.14908256880733944, | |
| "grad_norm": 0.030303625389933586, | |
| "learning_rate": 9.737431711798864e-05, | |
| "loss": 0.0645, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 65, | |
| "tokens_per_second_per_gpu": 437.07 | |
| }, | |
| { | |
| "epoch": 0.15137614678899083, | |
| "grad_norm": 0.027446158230304718, | |
| "learning_rate": 9.725191633560491e-05, | |
| "loss": 0.08, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 66, | |
| "tokens_per_second_per_gpu": 411.5 | |
| }, | |
| { | |
| "epoch": 0.1536697247706422, | |
| "grad_norm": 0.03177177160978317, | |
| "learning_rate": 9.712680772628364e-05, | |
| "loss": 0.0801, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 67, | |
| "tokens_per_second_per_gpu": 429.18 | |
| }, | |
| { | |
| "epoch": 0.1559633027522936, | |
| "grad_norm": 0.0288909412920475, | |
| "learning_rate": 9.69989984595216e-05, | |
| "loss": 0.0707, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 68, | |
| "tokens_per_second_per_gpu": 408.55 | |
| }, | |
| { | |
| "epoch": 0.15825688073394495, | |
| "grad_norm": 0.02751251310110092, | |
| "learning_rate": 9.686849585957994e-05, | |
| "loss": 0.0736, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 69, | |
| "tokens_per_second_per_gpu": 420.0 | |
| }, | |
| { | |
| "epoch": 0.16055045871559634, | |
| "grad_norm": 0.023428168147802353, | |
| "learning_rate": 9.673530740506447e-05, | |
| "loss": 0.0648, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 70, | |
| "tokens_per_second_per_gpu": 512.59 | |
| }, | |
| { | |
| "epoch": 0.1628440366972477, | |
| "grad_norm": 0.031534772366285324, | |
| "learning_rate": 9.659944072849707e-05, | |
| "loss": 0.0818, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 71, | |
| "tokens_per_second_per_gpu": 456.9 | |
| }, | |
| { | |
| "epoch": 0.1651376146788991, | |
| "grad_norm": 0.027208171784877777, | |
| "learning_rate": 9.646090361587827e-05, | |
| "loss": 0.0709, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 72, | |
| "tokens_per_second_per_gpu": 378.48 | |
| }, | |
| { | |
| "epoch": 0.16743119266055045, | |
| "grad_norm": 0.02961639314889908, | |
| "learning_rate": 9.631970400624113e-05, | |
| "loss": 0.0764, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 73, | |
| "tokens_per_second_per_gpu": 316.38 | |
| }, | |
| { | |
| "epoch": 0.16972477064220184, | |
| "grad_norm": 0.027367761358618736, | |
| "learning_rate": 9.617584999119625e-05, | |
| "loss": 0.0672, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 74, | |
| "tokens_per_second_per_gpu": 402.44 | |
| }, | |
| { | |
| "epoch": 0.1720183486238532, | |
| "grad_norm": 0.030167503282427788, | |
| "learning_rate": 9.602934981446803e-05, | |
| "loss": 0.0743, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 75, | |
| "tokens_per_second_per_gpu": 531.29 | |
| }, | |
| { | |
| "epoch": 0.1743119266055046, | |
| "grad_norm": 0.0387263149023056, | |
| "learning_rate": 9.588021187142235e-05, | |
| "loss": 0.083, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 76, | |
| "tokens_per_second_per_gpu": 424.59 | |
| }, | |
| { | |
| "epoch": 0.17660550458715596, | |
| "grad_norm": 0.027617793530225754, | |
| "learning_rate": 9.572844470858537e-05, | |
| "loss": 0.0769, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 77, | |
| "tokens_per_second_per_gpu": 461.9 | |
| }, | |
| { | |
| "epoch": 0.17889908256880735, | |
| "grad_norm": 0.029771512374281883, | |
| "learning_rate": 9.557405702315381e-05, | |
| "loss": 0.0658, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 78, | |
| "tokens_per_second_per_gpu": 475.77 | |
| }, | |
| { | |
| "epoch": 0.1811926605504587, | |
| "grad_norm": 0.029358675703406334, | |
| "learning_rate": 9.541705766249655e-05, | |
| "loss": 0.066, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 79, | |
| "tokens_per_second_per_gpu": 489.33 | |
| }, | |
| { | |
| "epoch": 0.1834862385321101, | |
| "grad_norm": 0.023111771792173386, | |
| "learning_rate": 9.525745562364756e-05, | |
| "loss": 0.066, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 80, | |
| "tokens_per_second_per_gpu": 382.84 | |
| }, | |
| { | |
| "epoch": 0.18577981651376146, | |
| "grad_norm": 0.029448291286826134, | |
| "learning_rate": 9.509526005279044e-05, | |
| "loss": 0.0608, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 81, | |
| "tokens_per_second_per_gpu": 415.81 | |
| }, | |
| { | |
| "epoch": 0.18807339449541285, | |
| "grad_norm": 0.02794116735458374, | |
| "learning_rate": 9.493048024473412e-05, | |
| "loss": 0.0736, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 82, | |
| "tokens_per_second_per_gpu": 400.02 | |
| }, | |
| { | |
| "epoch": 0.19036697247706422, | |
| "grad_norm": 0.04534873738884926, | |
| "learning_rate": 9.476312564238034e-05, | |
| "loss": 0.0673, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 83, | |
| "tokens_per_second_per_gpu": 369.1 | |
| }, | |
| { | |
| "epoch": 0.1926605504587156, | |
| "grad_norm": 0.026540853083133698, | |
| "learning_rate": 9.459320583618252e-05, | |
| "loss": 0.0558, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 84, | |
| "tokens_per_second_per_gpu": 611.61 | |
| }, | |
| { | |
| "epoch": 0.19495412844036697, | |
| "grad_norm": 0.03129403293132782, | |
| "learning_rate": 9.442073056359604e-05, | |
| "loss": 0.0741, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 85, | |
| "tokens_per_second_per_gpu": 492.16 | |
| }, | |
| { | |
| "epoch": 0.19724770642201836, | |
| "grad_norm": 0.027526071295142174, | |
| "learning_rate": 9.424570970852034e-05, | |
| "loss": 0.0733, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 86, | |
| "tokens_per_second_per_gpu": 427.76 | |
| }, | |
| { | |
| "epoch": 0.19954128440366972, | |
| "grad_norm": 0.025468798354268074, | |
| "learning_rate": 9.406815330073244e-05, | |
| "loss": 0.0613, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 87, | |
| "tokens_per_second_per_gpu": 462.82 | |
| }, | |
| { | |
| "epoch": 0.2018348623853211, | |
| "grad_norm": 0.029043635353446007, | |
| "learning_rate": 9.388807151531229e-05, | |
| "loss": 0.0758, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 88, | |
| "tokens_per_second_per_gpu": 353.91 | |
| }, | |
| { | |
| "epoch": 0.20412844036697247, | |
| "grad_norm": 0.03196391835808754, | |
| "learning_rate": 9.37054746720595e-05, | |
| "loss": 0.0678, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 89, | |
| "tokens_per_second_per_gpu": 411.71 | |
| }, | |
| { | |
| "epoch": 0.20642201834862386, | |
| "grad_norm": 0.033272091299295425, | |
| "learning_rate": 9.352037323490208e-05, | |
| "loss": 0.0722, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 90, | |
| "tokens_per_second_per_gpu": 398.81 | |
| }, | |
| { | |
| "epoch": 0.20871559633027523, | |
| "grad_norm": 0.03096090629696846, | |
| "learning_rate": 9.333277781129678e-05, | |
| "loss": 0.0809, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 91, | |
| "tokens_per_second_per_gpu": 393.81 | |
| }, | |
| { | |
| "epoch": 0.21100917431192662, | |
| "grad_norm": 0.026267440989613533, | |
| "learning_rate": 9.314269915162114e-05, | |
| "loss": 0.0604, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 92, | |
| "tokens_per_second_per_gpu": 453.78 | |
| }, | |
| { | |
| "epoch": 0.21330275229357798, | |
| "grad_norm": 0.02608361840248108, | |
| "learning_rate": 9.295014814855753e-05, | |
| "loss": 0.0663, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 93, | |
| "tokens_per_second_per_gpu": 430.47 | |
| }, | |
| { | |
| "epoch": 0.21559633027522937, | |
| "grad_norm": 0.024829065427184105, | |
| "learning_rate": 9.275513583646884e-05, | |
| "loss": 0.0598, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 94, | |
| "tokens_per_second_per_gpu": 384.01 | |
| }, | |
| { | |
| "epoch": 0.21788990825688073, | |
| "grad_norm": 0.03385532647371292, | |
| "learning_rate": 9.255767339076622e-05, | |
| "loss": 0.0719, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 95, | |
| "tokens_per_second_per_gpu": 440.35 | |
| }, | |
| { | |
| "epoch": 0.22018348623853212, | |
| "grad_norm": 0.029608217999339104, | |
| "learning_rate": 9.23577721272686e-05, | |
| "loss": 0.094, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.04, | |
| "memory/max_allocated (GiB)": 49.04, | |
| "step": 96, | |
| "tokens_per_second_per_gpu": 485.56 | |
| }, | |
| { | |
| "epoch": 0.22247706422018348, | |
| "grad_norm": 0.02693762816488743, | |
| "learning_rate": 9.215544350155422e-05, | |
| "loss": 0.0755, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 97, | |
| "tokens_per_second_per_gpu": 432.16 | |
| }, | |
| { | |
| "epoch": 0.22477064220183487, | |
| "grad_norm": 0.02771424688398838, | |
| "learning_rate": 9.195069910830427e-05, | |
| "loss": 0.0692, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 98, | |
| "tokens_per_second_per_gpu": 412.93 | |
| }, | |
| { | |
| "epoch": 0.22706422018348624, | |
| "grad_norm": 0.02276022732257843, | |
| "learning_rate": 9.174355068063828e-05, | |
| "loss": 0.0637, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 99, | |
| "tokens_per_second_per_gpu": 418.24 | |
| }, | |
| { | |
| "epoch": 0.22935779816513763, | |
| "grad_norm": 0.026155246421694756, | |
| "learning_rate": 9.15340100894418e-05, | |
| "loss": 0.0698, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 100, | |
| "tokens_per_second_per_gpu": 403.6 | |
| }, | |
| { | |
| "epoch": 0.231651376146789, | |
| "grad_norm": 0.022778436541557312, | |
| "learning_rate": 9.132208934268622e-05, | |
| "loss": 0.0654, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 101, | |
| "tokens_per_second_per_gpu": 491.32 | |
| }, | |
| { | |
| "epoch": 0.23394495412844038, | |
| "grad_norm": 0.04701945558190346, | |
| "learning_rate": 9.110780058474052e-05, | |
| "loss": 0.0741, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 102, | |
| "tokens_per_second_per_gpu": 444.03 | |
| }, | |
| { | |
| "epoch": 0.23623853211009174, | |
| "grad_norm": 0.030211661010980606, | |
| "learning_rate": 9.08911560956753e-05, | |
| "loss": 0.0789, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 103, | |
| "tokens_per_second_per_gpu": 514.87 | |
| }, | |
| { | |
| "epoch": 0.23853211009174313, | |
| "grad_norm": 0.026159459725022316, | |
| "learning_rate": 9.067216829055922e-05, | |
| "loss": 0.0637, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 104, | |
| "tokens_per_second_per_gpu": 446.47 | |
| }, | |
| { | |
| "epoch": 0.2408256880733945, | |
| "grad_norm": 0.02918146923184395, | |
| "learning_rate": 9.045084971874738e-05, | |
| "loss": 0.0727, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 105, | |
| "tokens_per_second_per_gpu": 425.37 | |
| }, | |
| { | |
| "epoch": 0.24311926605504589, | |
| "grad_norm": 0.03170175105333328, | |
| "learning_rate": 9.022721306316222e-05, | |
| "loss": 0.0857, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.85, | |
| "memory/max_allocated (GiB)": 48.85, | |
| "step": 106, | |
| "tokens_per_second_per_gpu": 301.79 | |
| }, | |
| { | |
| "epoch": 0.24541284403669725, | |
| "grad_norm": 0.032674651592969894, | |
| "learning_rate": 9.000127113956674e-05, | |
| "loss": 0.0795, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.77, | |
| "memory/max_allocated (GiB)": 48.77, | |
| "step": 107, | |
| "tokens_per_second_per_gpu": 338.41 | |
| }, | |
| { | |
| "epoch": 0.24770642201834864, | |
| "grad_norm": 0.026492780074477196, | |
| "learning_rate": 8.977303689583e-05, | |
| "loss": 0.0775, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 108, | |
| "tokens_per_second_per_gpu": 383.35 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.0290480125695467, | |
| "learning_rate": 8.954252341118523e-05, | |
| "loss": 0.076, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 109, | |
| "tokens_per_second_per_gpu": 382.78 | |
| }, | |
| { | |
| "epoch": 0.25229357798165136, | |
| "grad_norm": 0.030473977327346802, | |
| "learning_rate": 8.930974389548023e-05, | |
| "loss": 0.0761, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.93, | |
| "memory/max_allocated (GiB)": 48.93, | |
| "step": 110, | |
| "tokens_per_second_per_gpu": 476.56 | |
| }, | |
| { | |
| "epoch": 0.2545871559633027, | |
| "grad_norm": 0.02930077351629734, | |
| "learning_rate": 8.90747116884204e-05, | |
| "loss": 0.0691, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 111, | |
| "tokens_per_second_per_gpu": 441.2 | |
| }, | |
| { | |
| "epoch": 0.25688073394495414, | |
| "grad_norm": 0.02884151227772236, | |
| "learning_rate": 8.883744025880428e-05, | |
| "loss": 0.0806, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 112, | |
| "tokens_per_second_per_gpu": 406.96 | |
| }, | |
| { | |
| "epoch": 0.2591743119266055, | |
| "grad_norm": 0.02618175558745861, | |
| "learning_rate": 8.859794320375168e-05, | |
| "loss": 0.0677, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 113, | |
| "tokens_per_second_per_gpu": 430.04 | |
| }, | |
| { | |
| "epoch": 0.26146788990825687, | |
| "grad_norm": 0.026963548734784126, | |
| "learning_rate": 8.835623424792452e-05, | |
| "loss": 0.0694, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.89, | |
| "memory/max_allocated (GiB)": 48.89, | |
| "step": 114, | |
| "tokens_per_second_per_gpu": 351.9 | |
| }, | |
| { | |
| "epoch": 0.26376146788990823, | |
| "grad_norm": 0.021544624119997025, | |
| "learning_rate": 8.811232724274035e-05, | |
| "loss": 0.0613, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 115, | |
| "tokens_per_second_per_gpu": 480.22 | |
| }, | |
| { | |
| "epoch": 0.26605504587155965, | |
| "grad_norm": 0.03840009495615959, | |
| "learning_rate": 8.786623616557847e-05, | |
| "loss": 0.0723, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 116, | |
| "tokens_per_second_per_gpu": 433.18 | |
| }, | |
| { | |
| "epoch": 0.268348623853211, | |
| "grad_norm": 0.022571468725800514, | |
| "learning_rate": 8.761797511897906e-05, | |
| "loss": 0.065, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 49.0, | |
| "memory/max_allocated (GiB)": 49.0, | |
| "step": 117, | |
| "tokens_per_second_per_gpu": 421.92 | |
| }, | |
| { | |
| "epoch": 0.2706422018348624, | |
| "grad_norm": 0.02688576467335224, | |
| "learning_rate": 8.736755832983497e-05, | |
| "loss": 0.0772, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 118, | |
| "tokens_per_second_per_gpu": 354.3 | |
| }, | |
| { | |
| "epoch": 0.27293577981651373, | |
| "grad_norm": 0.025858785957098007, | |
| "learning_rate": 8.711500014857634e-05, | |
| "loss": 0.0745, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.81, | |
| "memory/max_allocated (GiB)": 48.81, | |
| "step": 119, | |
| "tokens_per_second_per_gpu": 365.46 | |
| }, | |
| { | |
| "epoch": 0.27522935779816515, | |
| "grad_norm": 0.02718079835176468, | |
| "learning_rate": 8.686031504834843e-05, | |
| "loss": 0.0759, | |
| "memory/device_reserved (GiB)": 50.97, | |
| "memory/max_active (GiB)": 48.97, | |
| "memory/max_allocated (GiB)": 48.97, | |
| "step": 120, | |
| "tokens_per_second_per_gpu": 426.06 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 436, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 60, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.4689538053609882e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |