| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.667209997283347, | |
| "eval_steps": 460, | |
| "global_step": 614, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0, | |
| "eval_loss": 1.9877382516860962, | |
| "eval_runtime": 2619.1512, | |
| "eval_samples_per_second": 0.573, | |
| "eval_steps_per_second": 0.143, | |
| "memory/device_reserved (GiB)": 60.2, | |
| "memory/max_active (GiB)": 58.24, | |
| "memory/max_allocated (GiB)": 58.24, | |
| "step": 0 | |
| }, | |
| { | |
| "epoch": 0.0010866612333604998, | |
| "grad_norm": 0.42468029260635376, | |
| "learning_rate": 0.0, | |
| "loss": 2.0554, | |
| "memory/device_reserved (GiB)": 76.43, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 1, | |
| "tokens_per_second_per_gpu": 1071.8 | |
| }, | |
| { | |
| "epoch": 0.0021733224667209996, | |
| "grad_norm": 0.35043102502822876, | |
| "learning_rate": 2.176278563656148e-07, | |
| "loss": 1.9057, | |
| "memory/device_reserved (GiB)": 76.43, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 2, | |
| "tokens_per_second_per_gpu": 14.62 | |
| }, | |
| { | |
| "epoch": 0.0032599837000814994, | |
| "grad_norm": 0.36562222242355347, | |
| "learning_rate": 4.352557127312296e-07, | |
| "loss": 1.967, | |
| "memory/device_reserved (GiB)": 76.43, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 3, | |
| "tokens_per_second_per_gpu": 10.92 | |
| }, | |
| { | |
| "epoch": 0.004346644933441999, | |
| "grad_norm": 0.43097689747810364, | |
| "learning_rate": 6.528835690968445e-07, | |
| "loss": 2.0106, | |
| "memory/device_reserved (GiB)": 76.43, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 4, | |
| "tokens_per_second_per_gpu": 13.36 | |
| }, | |
| { | |
| "epoch": 0.0054333061668024995, | |
| "grad_norm": 0.5135467052459717, | |
| "learning_rate": 8.705114254624592e-07, | |
| "loss": 2.0778, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.57, | |
| "memory/max_allocated (GiB)": 72.57, | |
| "step": 5, | |
| "tokens_per_second_per_gpu": 9.5 | |
| }, | |
| { | |
| "epoch": 0.006519967400162999, | |
| "grad_norm": 0.49276694655418396, | |
| "learning_rate": 1.0881392818280741e-06, | |
| "loss": 1.9529, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 6, | |
| "tokens_per_second_per_gpu": 7.92 | |
| }, | |
| { | |
| "epoch": 0.007606628633523499, | |
| "grad_norm": 0.37937992811203003, | |
| "learning_rate": 1.305767138193689e-06, | |
| "loss": 1.9503, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 7, | |
| "tokens_per_second_per_gpu": 9.08 | |
| }, | |
| { | |
| "epoch": 0.008693289866883998, | |
| "grad_norm": 0.3791826665401459, | |
| "learning_rate": 1.5233949945593036e-06, | |
| "loss": 2.0101, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 8, | |
| "tokens_per_second_per_gpu": 13.06 | |
| }, | |
| { | |
| "epoch": 0.009779951100244499, | |
| "grad_norm": 0.3549179136753082, | |
| "learning_rate": 1.7410228509249185e-06, | |
| "loss": 2.0314, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 9, | |
| "tokens_per_second_per_gpu": 9.06 | |
| }, | |
| { | |
| "epoch": 0.010866612333604999, | |
| "grad_norm": 0.45170578360557556, | |
| "learning_rate": 1.958650707290533e-06, | |
| "loss": 2.1162, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 10, | |
| "tokens_per_second_per_gpu": 8.01 | |
| }, | |
| { | |
| "epoch": 0.0119532735669655, | |
| "grad_norm": 0.41091838479042053, | |
| "learning_rate": 2.1762785636561482e-06, | |
| "loss": 2.0546, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 11, | |
| "tokens_per_second_per_gpu": 12.11 | |
| }, | |
| { | |
| "epoch": 0.013039934800325998, | |
| "grad_norm": 0.49504613876342773, | |
| "learning_rate": 2.393906420021763e-06, | |
| "loss": 2.0462, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 12, | |
| "tokens_per_second_per_gpu": 8.61 | |
| }, | |
| { | |
| "epoch": 0.014126596033686498, | |
| "grad_norm": 0.49015769362449646, | |
| "learning_rate": 2.611534276387378e-06, | |
| "loss": 2.1, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 13, | |
| "tokens_per_second_per_gpu": 20.26 | |
| }, | |
| { | |
| "epoch": 0.015213257267046998, | |
| "grad_norm": 0.39164263010025024, | |
| "learning_rate": 2.8291621327529926e-06, | |
| "loss": 1.9896, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 14, | |
| "tokens_per_second_per_gpu": 9.35 | |
| }, | |
| { | |
| "epoch": 0.016299918500407497, | |
| "grad_norm": 0.47378551959991455, | |
| "learning_rate": 3.0467899891186073e-06, | |
| "loss": 1.9729, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 15, | |
| "tokens_per_second_per_gpu": 7.64 | |
| }, | |
| { | |
| "epoch": 0.017386579733767997, | |
| "grad_norm": 0.29168593883514404, | |
| "learning_rate": 3.2644178454842223e-06, | |
| "loss": 1.9753, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 16, | |
| "tokens_per_second_per_gpu": 14.42 | |
| }, | |
| { | |
| "epoch": 0.018473240967128497, | |
| "grad_norm": 0.380478173494339, | |
| "learning_rate": 3.482045701849837e-06, | |
| "loss": 1.9711, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 17, | |
| "tokens_per_second_per_gpu": 9.6 | |
| }, | |
| { | |
| "epoch": 0.019559902200488997, | |
| "grad_norm": 0.5212889313697815, | |
| "learning_rate": 3.6996735582154516e-06, | |
| "loss": 2.0233, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 18, | |
| "tokens_per_second_per_gpu": 11.03 | |
| }, | |
| { | |
| "epoch": 0.020646563433849498, | |
| "grad_norm": 0.45196524262428284, | |
| "learning_rate": 3.917301414581066e-06, | |
| "loss": 1.9769, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.57, | |
| "memory/max_allocated (GiB)": 72.57, | |
| "step": 19, | |
| "tokens_per_second_per_gpu": 4.46 | |
| }, | |
| { | |
| "epoch": 0.021733224667209998, | |
| "grad_norm": 0.28705745935440063, | |
| "learning_rate": 4.134929270946682e-06, | |
| "loss": 1.8988, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 20, | |
| "tokens_per_second_per_gpu": 24.55 | |
| }, | |
| { | |
| "epoch": 0.022819885900570498, | |
| "grad_norm": 0.4902697503566742, | |
| "learning_rate": 4.3525571273122965e-06, | |
| "loss": 1.8456, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 21, | |
| "tokens_per_second_per_gpu": 7.34 | |
| }, | |
| { | |
| "epoch": 0.023906547133931, | |
| "grad_norm": 0.4651252031326294, | |
| "learning_rate": 4.570184983677911e-06, | |
| "loss": 1.9303, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 22, | |
| "tokens_per_second_per_gpu": 7.38 | |
| }, | |
| { | |
| "epoch": 0.024993208367291495, | |
| "grad_norm": 0.43143412470817566, | |
| "learning_rate": 4.787812840043526e-06, | |
| "loss": 2.0909, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 23, | |
| "tokens_per_second_per_gpu": 10.79 | |
| }, | |
| { | |
| "epoch": 0.026079869600651995, | |
| "grad_norm": 0.6988353133201599, | |
| "learning_rate": 5.005440696409141e-06, | |
| "loss": 1.9828, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 24, | |
| "tokens_per_second_per_gpu": 8.67 | |
| }, | |
| { | |
| "epoch": 0.027166530834012496, | |
| "grad_norm": 0.4511033296585083, | |
| "learning_rate": 5.223068552774756e-06, | |
| "loss": 1.9453, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 25, | |
| "tokens_per_second_per_gpu": 13.06 | |
| }, | |
| { | |
| "epoch": 0.028253192067372996, | |
| "grad_norm": 0.7656753063201904, | |
| "learning_rate": 5.4406964091403706e-06, | |
| "loss": 2.0161, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 26, | |
| "tokens_per_second_per_gpu": 9.34 | |
| }, | |
| { | |
| "epoch": 0.029339853300733496, | |
| "grad_norm": 0.46474117040634155, | |
| "learning_rate": 5.658324265505985e-06, | |
| "loss": 1.9159, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 27, | |
| "tokens_per_second_per_gpu": 6.9 | |
| }, | |
| { | |
| "epoch": 0.030426514534093996, | |
| "grad_norm": 0.5297034382820129, | |
| "learning_rate": 5.8759521218716e-06, | |
| "loss": 1.9806, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 28, | |
| "tokens_per_second_per_gpu": 14.71 | |
| }, | |
| { | |
| "epoch": 0.03151317576745449, | |
| "grad_norm": 0.4147724211215973, | |
| "learning_rate": 6.0935799782372145e-06, | |
| "loss": 1.9809, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 29, | |
| "tokens_per_second_per_gpu": 9.11 | |
| }, | |
| { | |
| "epoch": 0.032599837000814993, | |
| "grad_norm": 0.37926042079925537, | |
| "learning_rate": 6.311207834602829e-06, | |
| "loss": 1.8517, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 30, | |
| "tokens_per_second_per_gpu": 6.78 | |
| }, | |
| { | |
| "epoch": 0.033686498234175494, | |
| "grad_norm": 0.45750337839126587, | |
| "learning_rate": 6.528835690968445e-06, | |
| "loss": 2.0566, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 31, | |
| "tokens_per_second_per_gpu": 7.22 | |
| }, | |
| { | |
| "epoch": 0.034773159467535994, | |
| "grad_norm": 0.3797982633113861, | |
| "learning_rate": 6.7464635473340585e-06, | |
| "loss": 1.9443, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 32, | |
| "tokens_per_second_per_gpu": 9.32 | |
| }, | |
| { | |
| "epoch": 0.035859820700896494, | |
| "grad_norm": 0.3069414794445038, | |
| "learning_rate": 6.964091403699674e-06, | |
| "loss": 1.8847, | |
| "memory/device_reserved (GiB)": 76.44, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 33, | |
| "tokens_per_second_per_gpu": 16.77 | |
| }, | |
| { | |
| "epoch": 0.036946481934256994, | |
| "grad_norm": 0.40665334463119507, | |
| "learning_rate": 7.1817192600652895e-06, | |
| "loss": 1.8539, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 34, | |
| "tokens_per_second_per_gpu": 14.71 | |
| }, | |
| { | |
| "epoch": 0.038033143167617495, | |
| "grad_norm": 0.31351128220558167, | |
| "learning_rate": 7.399347116430903e-06, | |
| "loss": 1.8252, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 35, | |
| "tokens_per_second_per_gpu": 13.1 | |
| }, | |
| { | |
| "epoch": 0.039119804400977995, | |
| "grad_norm": 0.33141955733299255, | |
| "learning_rate": 7.616974972796519e-06, | |
| "loss": 1.8449, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 36, | |
| "tokens_per_second_per_gpu": 13.33 | |
| }, | |
| { | |
| "epoch": 0.040206465634338495, | |
| "grad_norm": 0.42479243874549866, | |
| "learning_rate": 7.834602829162133e-06, | |
| "loss": 1.9523, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 37, | |
| "tokens_per_second_per_gpu": 7.55 | |
| }, | |
| { | |
| "epoch": 0.041293126867698995, | |
| "grad_norm": 0.40581783652305603, | |
| "learning_rate": 8.052230685527748e-06, | |
| "loss": 1.9402, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 38, | |
| "tokens_per_second_per_gpu": 14.8 | |
| }, | |
| { | |
| "epoch": 0.042379788101059496, | |
| "grad_norm": 0.35553255677223206, | |
| "learning_rate": 8.269858541893364e-06, | |
| "loss": 1.896, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 39, | |
| "tokens_per_second_per_gpu": 11.85 | |
| }, | |
| { | |
| "epoch": 0.043466449334419996, | |
| "grad_norm": 0.44665297865867615, | |
| "learning_rate": 8.487486398258977e-06, | |
| "loss": 1.9207, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 40, | |
| "tokens_per_second_per_gpu": 10.21 | |
| }, | |
| { | |
| "epoch": 0.044553110567780496, | |
| "grad_norm": 0.34954237937927246, | |
| "learning_rate": 8.705114254624593e-06, | |
| "loss": 1.846, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 41, | |
| "tokens_per_second_per_gpu": 14.95 | |
| }, | |
| { | |
| "epoch": 0.045639771801140996, | |
| "grad_norm": 0.3278484046459198, | |
| "learning_rate": 8.922742110990207e-06, | |
| "loss": 1.8059, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 42, | |
| "tokens_per_second_per_gpu": 7.62 | |
| }, | |
| { | |
| "epoch": 0.0467264330345015, | |
| "grad_norm": 0.35997408628463745, | |
| "learning_rate": 9.140369967355822e-06, | |
| "loss": 1.8217, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 43, | |
| "tokens_per_second_per_gpu": 8.14 | |
| }, | |
| { | |
| "epoch": 0.047813094267862, | |
| "grad_norm": 0.37220242619514465, | |
| "learning_rate": 9.357997823721438e-06, | |
| "loss": 1.879, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 44, | |
| "tokens_per_second_per_gpu": 12.91 | |
| }, | |
| { | |
| "epoch": 0.0488997555012225, | |
| "grad_norm": 0.26906973123550415, | |
| "learning_rate": 9.575625680087052e-06, | |
| "loss": 1.7932, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 45, | |
| "tokens_per_second_per_gpu": 13.48 | |
| }, | |
| { | |
| "epoch": 0.04998641673458299, | |
| "grad_norm": 0.34588104486465454, | |
| "learning_rate": 9.793253536452667e-06, | |
| "loss": 1.8384, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 46, | |
| "tokens_per_second_per_gpu": 9.64 | |
| }, | |
| { | |
| "epoch": 0.05107307796794349, | |
| "grad_norm": 0.277955025434494, | |
| "learning_rate": 1.0010881392818283e-05, | |
| "loss": 1.8192, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 47, | |
| "tokens_per_second_per_gpu": 13.16 | |
| }, | |
| { | |
| "epoch": 0.05215973920130399, | |
| "grad_norm": 0.5072764158248901, | |
| "learning_rate": 1.0228509249183896e-05, | |
| "loss": 1.8019, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 48, | |
| "tokens_per_second_per_gpu": 18.48 | |
| }, | |
| { | |
| "epoch": 0.05324640043466449, | |
| "grad_norm": 0.6679080724716187, | |
| "learning_rate": 1.0446137105549512e-05, | |
| "loss": 1.8317, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 49, | |
| "tokens_per_second_per_gpu": 22.34 | |
| }, | |
| { | |
| "epoch": 0.05433306166802499, | |
| "grad_norm": 0.355943500995636, | |
| "learning_rate": 1.0663764961915126e-05, | |
| "loss": 1.8473, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 50, | |
| "tokens_per_second_per_gpu": 7.76 | |
| }, | |
| { | |
| "epoch": 0.05541972290138549, | |
| "grad_norm": 0.7100318074226379, | |
| "learning_rate": 1.0881392818280741e-05, | |
| "loss": 1.8663, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 51, | |
| "tokens_per_second_per_gpu": 10.05 | |
| }, | |
| { | |
| "epoch": 0.05650638413474599, | |
| "grad_norm": 0.29079458117485046, | |
| "learning_rate": 1.1099020674646355e-05, | |
| "loss": 1.7866, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 52, | |
| "tokens_per_second_per_gpu": 11.77 | |
| }, | |
| { | |
| "epoch": 0.05759304536810649, | |
| "grad_norm": 0.277709037065506, | |
| "learning_rate": 1.131664853101197e-05, | |
| "loss": 1.6705, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 53, | |
| "tokens_per_second_per_gpu": 10.66 | |
| }, | |
| { | |
| "epoch": 0.05867970660146699, | |
| "grad_norm": 0.2641984820365906, | |
| "learning_rate": 1.1534276387377584e-05, | |
| "loss": 1.7629, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 54, | |
| "tokens_per_second_per_gpu": 8.33 | |
| }, | |
| { | |
| "epoch": 0.05976636783482749, | |
| "grad_norm": 0.24029381573200226, | |
| "learning_rate": 1.17519042437432e-05, | |
| "loss": 1.7173, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 55, | |
| "tokens_per_second_per_gpu": 10.63 | |
| }, | |
| { | |
| "epoch": 0.06085302906818799, | |
| "grad_norm": 0.31423497200012207, | |
| "learning_rate": 1.1969532100108814e-05, | |
| "loss": 1.8497, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 56, | |
| "tokens_per_second_per_gpu": 10.96 | |
| }, | |
| { | |
| "epoch": 0.06193969030154849, | |
| "grad_norm": 0.21849602460861206, | |
| "learning_rate": 1.2187159956474429e-05, | |
| "loss": 1.7107, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 57, | |
| "tokens_per_second_per_gpu": 22.23 | |
| }, | |
| { | |
| "epoch": 0.06302635153490899, | |
| "grad_norm": 0.2778118848800659, | |
| "learning_rate": 1.2404787812840043e-05, | |
| "loss": 1.7746, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 58, | |
| "tokens_per_second_per_gpu": 12.92 | |
| }, | |
| { | |
| "epoch": 0.06411301276826949, | |
| "grad_norm": 0.37199434638023376, | |
| "learning_rate": 1.2622415669205658e-05, | |
| "loss": 1.7745, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 59, | |
| "tokens_per_second_per_gpu": 7.23 | |
| }, | |
| { | |
| "epoch": 0.06519967400162999, | |
| "grad_norm": 0.3311302065849304, | |
| "learning_rate": 1.2840043525571274e-05, | |
| "loss": 1.7232, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 60, | |
| "tokens_per_second_per_gpu": 14.95 | |
| }, | |
| { | |
| "epoch": 0.06628633523499049, | |
| "grad_norm": 0.2378620058298111, | |
| "learning_rate": 1.305767138193689e-05, | |
| "loss": 1.6908, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 61, | |
| "tokens_per_second_per_gpu": 7.03 | |
| }, | |
| { | |
| "epoch": 0.06737299646835099, | |
| "grad_norm": 0.2607276141643524, | |
| "learning_rate": 1.3275299238302501e-05, | |
| "loss": 1.8013, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 62, | |
| "tokens_per_second_per_gpu": 12.32 | |
| }, | |
| { | |
| "epoch": 0.06845965770171149, | |
| "grad_norm": 0.3231821060180664, | |
| "learning_rate": 1.3492927094668117e-05, | |
| "loss": 1.7046, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 63, | |
| "tokens_per_second_per_gpu": 13.99 | |
| }, | |
| { | |
| "epoch": 0.06954631893507199, | |
| "grad_norm": 0.2653919458389282, | |
| "learning_rate": 1.3710554951033732e-05, | |
| "loss": 1.6847, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 64, | |
| "tokens_per_second_per_gpu": 14.23 | |
| }, | |
| { | |
| "epoch": 0.07063298016843249, | |
| "grad_norm": 0.25825828313827515, | |
| "learning_rate": 1.3928182807399348e-05, | |
| "loss": 1.7092, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 65, | |
| "tokens_per_second_per_gpu": 4.17 | |
| }, | |
| { | |
| "epoch": 0.07171964140179299, | |
| "grad_norm": 0.2830359935760498, | |
| "learning_rate": 1.4145810663764963e-05, | |
| "loss": 1.6906, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 66, | |
| "tokens_per_second_per_gpu": 10.64 | |
| }, | |
| { | |
| "epoch": 0.07280630263515349, | |
| "grad_norm": 0.2986491024494171, | |
| "learning_rate": 1.4363438520130579e-05, | |
| "loss": 1.7085, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 67, | |
| "tokens_per_second_per_gpu": 15.72 | |
| }, | |
| { | |
| "epoch": 0.07389296386851399, | |
| "grad_norm": 0.23011702299118042, | |
| "learning_rate": 1.4581066376496191e-05, | |
| "loss": 1.6954, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 68, | |
| "tokens_per_second_per_gpu": 14.35 | |
| }, | |
| { | |
| "epoch": 0.07497962510187449, | |
| "grad_norm": 0.34076130390167236, | |
| "learning_rate": 1.4798694232861807e-05, | |
| "loss": 1.6923, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 69, | |
| "tokens_per_second_per_gpu": 15.48 | |
| }, | |
| { | |
| "epoch": 0.07606628633523499, | |
| "grad_norm": 0.2995977997779846, | |
| "learning_rate": 1.5016322089227422e-05, | |
| "loss": 1.6535, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 70, | |
| "tokens_per_second_per_gpu": 12.57 | |
| }, | |
| { | |
| "epoch": 0.07715294756859549, | |
| "grad_norm": 0.26191699504852295, | |
| "learning_rate": 1.5233949945593038e-05, | |
| "loss": 1.6512, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 71, | |
| "tokens_per_second_per_gpu": 10.57 | |
| }, | |
| { | |
| "epoch": 0.07823960880195599, | |
| "grad_norm": 0.2458246797323227, | |
| "learning_rate": 1.545157780195865e-05, | |
| "loss": 1.6651, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 72, | |
| "tokens_per_second_per_gpu": 9.52 | |
| }, | |
| { | |
| "epoch": 0.07932627003531649, | |
| "grad_norm": 0.2605074346065521, | |
| "learning_rate": 1.5669205658324265e-05, | |
| "loss": 1.6524, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 73, | |
| "tokens_per_second_per_gpu": 7.08 | |
| }, | |
| { | |
| "epoch": 0.08041293126867699, | |
| "grad_norm": 0.32439377903938293, | |
| "learning_rate": 1.588683351468988e-05, | |
| "loss": 1.7285, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 74, | |
| "tokens_per_second_per_gpu": 14.99 | |
| }, | |
| { | |
| "epoch": 0.08149959250203749, | |
| "grad_norm": 0.24850843846797943, | |
| "learning_rate": 1.6104461371055496e-05, | |
| "loss": 1.648, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 75, | |
| "tokens_per_second_per_gpu": 11.46 | |
| }, | |
| { | |
| "epoch": 0.08258625373539799, | |
| "grad_norm": 0.269356906414032, | |
| "learning_rate": 1.632208922742111e-05, | |
| "loss": 1.6221, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 76, | |
| "tokens_per_second_per_gpu": 12.22 | |
| }, | |
| { | |
| "epoch": 0.08367291496875849, | |
| "grad_norm": 0.31279706954956055, | |
| "learning_rate": 1.6539717083786727e-05, | |
| "loss": 1.6597, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 77, | |
| "tokens_per_second_per_gpu": 13.72 | |
| }, | |
| { | |
| "epoch": 0.08475957620211899, | |
| "grad_norm": 0.4338330328464508, | |
| "learning_rate": 1.6757344940152338e-05, | |
| "loss": 1.5959, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 78, | |
| "tokens_per_second_per_gpu": 8.45 | |
| }, | |
| { | |
| "epoch": 0.08584623743547949, | |
| "grad_norm": 0.22105397284030914, | |
| "learning_rate": 1.6974972796517955e-05, | |
| "loss": 1.6385, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 79, | |
| "tokens_per_second_per_gpu": 10.33 | |
| }, | |
| { | |
| "epoch": 0.08693289866883999, | |
| "grad_norm": 0.17997324466705322, | |
| "learning_rate": 1.719260065288357e-05, | |
| "loss": 1.575, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 80, | |
| "tokens_per_second_per_gpu": 12.57 | |
| }, | |
| { | |
| "epoch": 0.08801955990220049, | |
| "grad_norm": 0.2515506446361542, | |
| "learning_rate": 1.7410228509249186e-05, | |
| "loss": 1.5705, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 81, | |
| "tokens_per_second_per_gpu": 10.02 | |
| }, | |
| { | |
| "epoch": 0.08910622113556099, | |
| "grad_norm": 0.27079910039901733, | |
| "learning_rate": 1.76278563656148e-05, | |
| "loss": 1.6829, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 82, | |
| "tokens_per_second_per_gpu": 16.61 | |
| }, | |
| { | |
| "epoch": 0.09019288236892149, | |
| "grad_norm": 0.3444445729255676, | |
| "learning_rate": 1.7845484221980413e-05, | |
| "loss": 1.6102, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 83, | |
| "tokens_per_second_per_gpu": 10.72 | |
| }, | |
| { | |
| "epoch": 0.09127954360228199, | |
| "grad_norm": 1.1343846321105957, | |
| "learning_rate": 1.8063112078346027e-05, | |
| "loss": 1.4927, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 84, | |
| "tokens_per_second_per_gpu": 21.47 | |
| }, | |
| { | |
| "epoch": 0.0923662048356425, | |
| "grad_norm": 0.2567317485809326, | |
| "learning_rate": 1.8280739934711644e-05, | |
| "loss": 1.545, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 85, | |
| "tokens_per_second_per_gpu": 7.55 | |
| }, | |
| { | |
| "epoch": 0.093452866069003, | |
| "grad_norm": 0.2513413727283478, | |
| "learning_rate": 1.8498367791077258e-05, | |
| "loss": 1.5424, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 86, | |
| "tokens_per_second_per_gpu": 9.61 | |
| }, | |
| { | |
| "epoch": 0.0945395273023635, | |
| "grad_norm": 0.28799301385879517, | |
| "learning_rate": 1.8715995647442875e-05, | |
| "loss": 1.5623, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 87, | |
| "tokens_per_second_per_gpu": 5.81 | |
| }, | |
| { | |
| "epoch": 0.095626188535724, | |
| "grad_norm": 0.21057067811489105, | |
| "learning_rate": 1.893362350380849e-05, | |
| "loss": 1.5007, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 88, | |
| "tokens_per_second_per_gpu": 15.02 | |
| }, | |
| { | |
| "epoch": 0.0967128497690845, | |
| "grad_norm": 0.3033576011657715, | |
| "learning_rate": 1.9151251360174103e-05, | |
| "loss": 1.5475, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 89, | |
| "tokens_per_second_per_gpu": 12.0 | |
| }, | |
| { | |
| "epoch": 0.097799511002445, | |
| "grad_norm": 0.25805121660232544, | |
| "learning_rate": 1.9368879216539717e-05, | |
| "loss": 1.6443, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 90, | |
| "tokens_per_second_per_gpu": 11.86 | |
| }, | |
| { | |
| "epoch": 0.0988861722358055, | |
| "grad_norm": 0.21924947202205658, | |
| "learning_rate": 1.9586507072905334e-05, | |
| "loss": 1.6589, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 91, | |
| "tokens_per_second_per_gpu": 10.56 | |
| }, | |
| { | |
| "epoch": 0.09997283346916598, | |
| "grad_norm": 0.3363969027996063, | |
| "learning_rate": 1.9804134929270948e-05, | |
| "loss": 1.6369, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 92, | |
| "tokens_per_second_per_gpu": 17.86 | |
| }, | |
| { | |
| "epoch": 0.10105949470252648, | |
| "grad_norm": 0.28643980622291565, | |
| "learning_rate": 2.0021762785636565e-05, | |
| "loss": 1.5086, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 93, | |
| "tokens_per_second_per_gpu": 9.02 | |
| }, | |
| { | |
| "epoch": 0.10214615593588698, | |
| "grad_norm": 0.39223000407218933, | |
| "learning_rate": 2.0239390642002175e-05, | |
| "loss": 1.6008, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 94, | |
| "tokens_per_second_per_gpu": 12.15 | |
| }, | |
| { | |
| "epoch": 0.10323281716924748, | |
| "grad_norm": 0.2880799174308777, | |
| "learning_rate": 2.0457018498367793e-05, | |
| "loss": 1.569, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 95, | |
| "tokens_per_second_per_gpu": 10.86 | |
| }, | |
| { | |
| "epoch": 0.10431947840260798, | |
| "grad_norm": 0.26068100333213806, | |
| "learning_rate": 2.0674646354733406e-05, | |
| "loss": 1.5852, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 96, | |
| "tokens_per_second_per_gpu": 11.28 | |
| }, | |
| { | |
| "epoch": 0.10540613963596848, | |
| "grad_norm": 0.36132219433784485, | |
| "learning_rate": 2.0892274211099024e-05, | |
| "loss": 1.5752, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 97, | |
| "tokens_per_second_per_gpu": 6.53 | |
| }, | |
| { | |
| "epoch": 0.10649280086932898, | |
| "grad_norm": 0.3137778341770172, | |
| "learning_rate": 2.1109902067464637e-05, | |
| "loss": 1.5851, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 98, | |
| "tokens_per_second_per_gpu": 20.68 | |
| }, | |
| { | |
| "epoch": 0.10757946210268948, | |
| "grad_norm": 0.25605323910713196, | |
| "learning_rate": 2.132752992383025e-05, | |
| "loss": 1.5163, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 99, | |
| "tokens_per_second_per_gpu": 9.63 | |
| }, | |
| { | |
| "epoch": 0.10866612333604998, | |
| "grad_norm": 0.3755965530872345, | |
| "learning_rate": 2.1545157780195865e-05, | |
| "loss": 1.5058, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 100, | |
| "tokens_per_second_per_gpu": 19.46 | |
| }, | |
| { | |
| "epoch": 0.10975278456941048, | |
| "grad_norm": 0.2792341411113739, | |
| "learning_rate": 2.1762785636561482e-05, | |
| "loss": 1.5316, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 101, | |
| "tokens_per_second_per_gpu": 14.59 | |
| }, | |
| { | |
| "epoch": 0.11083944580277098, | |
| "grad_norm": 0.2814246118068695, | |
| "learning_rate": 2.1980413492927096e-05, | |
| "loss": 1.4743, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 102, | |
| "tokens_per_second_per_gpu": 7.59 | |
| }, | |
| { | |
| "epoch": 0.11192610703613148, | |
| "grad_norm": 0.2572295367717743, | |
| "learning_rate": 2.219804134929271e-05, | |
| "loss": 1.5227, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 103, | |
| "tokens_per_second_per_gpu": 12.06 | |
| }, | |
| { | |
| "epoch": 0.11301276826949198, | |
| "grad_norm": 0.29003283381462097, | |
| "learning_rate": 2.2415669205658324e-05, | |
| "loss": 1.5116, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 104, | |
| "tokens_per_second_per_gpu": 10.89 | |
| }, | |
| { | |
| "epoch": 0.11409942950285248, | |
| "grad_norm": 0.28519585728645325, | |
| "learning_rate": 2.263329706202394e-05, | |
| "loss": 1.4856, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 105, | |
| "tokens_per_second_per_gpu": 11.62 | |
| }, | |
| { | |
| "epoch": 0.11518609073621298, | |
| "grad_norm": 0.2388896644115448, | |
| "learning_rate": 2.2850924918389555e-05, | |
| "loss": 1.4683, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 106, | |
| "tokens_per_second_per_gpu": 7.94 | |
| }, | |
| { | |
| "epoch": 0.11627275196957348, | |
| "grad_norm": 0.26784244179725647, | |
| "learning_rate": 2.306855277475517e-05, | |
| "loss": 1.5542, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 107, | |
| "tokens_per_second_per_gpu": 18.34 | |
| }, | |
| { | |
| "epoch": 0.11735941320293398, | |
| "grad_norm": 0.29058554768562317, | |
| "learning_rate": 2.3286180631120786e-05, | |
| "loss": 1.6059, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 108, | |
| "tokens_per_second_per_gpu": 10.91 | |
| }, | |
| { | |
| "epoch": 0.11844607443629449, | |
| "grad_norm": 0.3047916889190674, | |
| "learning_rate": 2.35038084874864e-05, | |
| "loss": 1.5123, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 109, | |
| "tokens_per_second_per_gpu": 8.84 | |
| }, | |
| { | |
| "epoch": 0.11953273566965499, | |
| "grad_norm": 0.2555934488773346, | |
| "learning_rate": 2.3721436343852013e-05, | |
| "loss": 1.4991, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 110, | |
| "tokens_per_second_per_gpu": 14.15 | |
| }, | |
| { | |
| "epoch": 0.12061939690301549, | |
| "grad_norm": 0.2994355261325836, | |
| "learning_rate": 2.3939064200217627e-05, | |
| "loss": 1.464, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 111, | |
| "tokens_per_second_per_gpu": 8.41 | |
| }, | |
| { | |
| "epoch": 0.12170605813637599, | |
| "grad_norm": 0.2840193212032318, | |
| "learning_rate": 2.4156692056583244e-05, | |
| "loss": 1.4107, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 112, | |
| "tokens_per_second_per_gpu": 3.97 | |
| }, | |
| { | |
| "epoch": 0.12279271936973649, | |
| "grad_norm": 0.40754270553588867, | |
| "learning_rate": 2.4374319912948858e-05, | |
| "loss": 1.5409, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 113, | |
| "tokens_per_second_per_gpu": 10.7 | |
| }, | |
| { | |
| "epoch": 0.12387938060309699, | |
| "grad_norm": 0.2855842113494873, | |
| "learning_rate": 2.4591947769314475e-05, | |
| "loss": 1.534, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 114, | |
| "tokens_per_second_per_gpu": 12.9 | |
| }, | |
| { | |
| "epoch": 0.12496604183645749, | |
| "grad_norm": 0.3067193627357483, | |
| "learning_rate": 2.4809575625680086e-05, | |
| "loss": 1.4645, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 115, | |
| "tokens_per_second_per_gpu": 7.59 | |
| }, | |
| { | |
| "epoch": 0.12605270306981797, | |
| "grad_norm": 0.2511027753353119, | |
| "learning_rate": 2.5027203482045703e-05, | |
| "loss": 1.5877, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 116, | |
| "tokens_per_second_per_gpu": 10.71 | |
| }, | |
| { | |
| "epoch": 0.1271393643031785, | |
| "grad_norm": 0.32290828227996826, | |
| "learning_rate": 2.5244831338411317e-05, | |
| "loss": 1.5267, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 117, | |
| "tokens_per_second_per_gpu": 4.34 | |
| }, | |
| { | |
| "epoch": 0.12822602553653897, | |
| "grad_norm": 0.2688813805580139, | |
| "learning_rate": 2.5462459194776934e-05, | |
| "loss": 1.4195, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 118, | |
| "tokens_per_second_per_gpu": 15.65 | |
| }, | |
| { | |
| "epoch": 0.1293126867698995, | |
| "grad_norm": 0.39141154289245605, | |
| "learning_rate": 2.5680087051142548e-05, | |
| "loss": 1.5449, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 119, | |
| "tokens_per_second_per_gpu": 7.85 | |
| }, | |
| { | |
| "epoch": 0.13039934800325997, | |
| "grad_norm": 0.26747387647628784, | |
| "learning_rate": 2.5897714907508165e-05, | |
| "loss": 1.5743, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 120, | |
| "tokens_per_second_per_gpu": 9.19 | |
| }, | |
| { | |
| "epoch": 0.1314860092366205, | |
| "grad_norm": 0.2591267228126526, | |
| "learning_rate": 2.611534276387378e-05, | |
| "loss": 1.5188, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 121, | |
| "tokens_per_second_per_gpu": 9.79 | |
| }, | |
| { | |
| "epoch": 0.13257267046998097, | |
| "grad_norm": 0.38989391922950745, | |
| "learning_rate": 2.6332970620239396e-05, | |
| "loss": 1.57, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 122, | |
| "tokens_per_second_per_gpu": 7.93 | |
| }, | |
| { | |
| "epoch": 0.1336593317033415, | |
| "grad_norm": 0.24863803386688232, | |
| "learning_rate": 2.6550598476605003e-05, | |
| "loss": 1.4488, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 123, | |
| "tokens_per_second_per_gpu": 10.66 | |
| }, | |
| { | |
| "epoch": 0.13474599293670197, | |
| "grad_norm": 0.31406891345977783, | |
| "learning_rate": 2.676822633297062e-05, | |
| "loss": 1.5277, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 124, | |
| "tokens_per_second_per_gpu": 11.44 | |
| }, | |
| { | |
| "epoch": 0.1358326541700625, | |
| "grad_norm": 0.32116463780403137, | |
| "learning_rate": 2.6985854189336234e-05, | |
| "loss": 1.5117, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 125, | |
| "tokens_per_second_per_gpu": 13.13 | |
| }, | |
| { | |
| "epoch": 0.13691931540342298, | |
| "grad_norm": 0.3352220058441162, | |
| "learning_rate": 2.720348204570185e-05, | |
| "loss": 1.4856, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 126, | |
| "tokens_per_second_per_gpu": 7.29 | |
| }, | |
| { | |
| "epoch": 0.1380059766367835, | |
| "grad_norm": 0.30594614148139954, | |
| "learning_rate": 2.7421109902067465e-05, | |
| "loss": 1.4479, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 127, | |
| "tokens_per_second_per_gpu": 22.33 | |
| }, | |
| { | |
| "epoch": 0.13909263787014398, | |
| "grad_norm": 0.3846677839756012, | |
| "learning_rate": 2.7638737758433082e-05, | |
| "loss": 1.4566, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 128, | |
| "tokens_per_second_per_gpu": 9.28 | |
| }, | |
| { | |
| "epoch": 0.1401792991035045, | |
| "grad_norm": 0.9043371081352234, | |
| "learning_rate": 2.7856365614798696e-05, | |
| "loss": 1.4797, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 129, | |
| "tokens_per_second_per_gpu": 13.71 | |
| }, | |
| { | |
| "epoch": 0.14126596033686498, | |
| "grad_norm": 0.3109076917171478, | |
| "learning_rate": 2.8073993471164313e-05, | |
| "loss": 1.4993, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 130, | |
| "tokens_per_second_per_gpu": 15.34 | |
| }, | |
| { | |
| "epoch": 0.1423526215702255, | |
| "grad_norm": 0.2666779160499573, | |
| "learning_rate": 2.8291621327529927e-05, | |
| "loss": 1.3997, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 131, | |
| "tokens_per_second_per_gpu": 11.35 | |
| }, | |
| { | |
| "epoch": 0.14343928280358598, | |
| "grad_norm": 0.37060707807540894, | |
| "learning_rate": 2.850924918389554e-05, | |
| "loss": 1.4573, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 132, | |
| "tokens_per_second_per_gpu": 15.36 | |
| }, | |
| { | |
| "epoch": 0.1445259440369465, | |
| "grad_norm": 0.22918947041034698, | |
| "learning_rate": 2.8726877040261158e-05, | |
| "loss": 1.3811, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 133, | |
| "tokens_per_second_per_gpu": 12.65 | |
| }, | |
| { | |
| "epoch": 0.14561260527030698, | |
| "grad_norm": 0.3369714617729187, | |
| "learning_rate": 2.894450489662677e-05, | |
| "loss": 1.4893, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 134, | |
| "tokens_per_second_per_gpu": 8.1 | |
| }, | |
| { | |
| "epoch": 0.1466992665036675, | |
| "grad_norm": 0.23793597519397736, | |
| "learning_rate": 2.9162132752992382e-05, | |
| "loss": 1.546, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 135, | |
| "tokens_per_second_per_gpu": 17.4 | |
| }, | |
| { | |
| "epoch": 0.14778592773702798, | |
| "grad_norm": 0.30026352405548096, | |
| "learning_rate": 2.9379760609358e-05, | |
| "loss": 1.4539, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 136, | |
| "tokens_per_second_per_gpu": 8.22 | |
| }, | |
| { | |
| "epoch": 0.1488725889703885, | |
| "grad_norm": 0.35859429836273193, | |
| "learning_rate": 2.9597388465723613e-05, | |
| "loss": 1.4538, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 137, | |
| "tokens_per_second_per_gpu": 12.48 | |
| }, | |
| { | |
| "epoch": 0.14995925020374898, | |
| "grad_norm": 0.3045945167541504, | |
| "learning_rate": 2.9815016322089227e-05, | |
| "loss": 1.5104, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 138, | |
| "tokens_per_second_per_gpu": 11.95 | |
| }, | |
| { | |
| "epoch": 0.1510459114371095, | |
| "grad_norm": 0.3489134907722473, | |
| "learning_rate": 3.0032644178454844e-05, | |
| "loss": 1.4705, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 139, | |
| "tokens_per_second_per_gpu": 13.89 | |
| }, | |
| { | |
| "epoch": 0.15213257267046998, | |
| "grad_norm": 0.40093690156936646, | |
| "learning_rate": 3.0250272034820458e-05, | |
| "loss": 1.4977, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 140, | |
| "tokens_per_second_per_gpu": 6.07 | |
| }, | |
| { | |
| "epoch": 0.1532192339038305, | |
| "grad_norm": 0.29712459444999695, | |
| "learning_rate": 3.0467899891186075e-05, | |
| "loss": 1.4557, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 141, | |
| "tokens_per_second_per_gpu": 8.95 | |
| }, | |
| { | |
| "epoch": 0.15430589513719098, | |
| "grad_norm": 0.29204878211021423, | |
| "learning_rate": 3.068552774755169e-05, | |
| "loss": 1.3598, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 142, | |
| "tokens_per_second_per_gpu": 7.25 | |
| }, | |
| { | |
| "epoch": 0.1553925563705515, | |
| "grad_norm": 0.40814992785453796, | |
| "learning_rate": 3.09031556039173e-05, | |
| "loss": 1.5122, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 143, | |
| "tokens_per_second_per_gpu": 11.98 | |
| }, | |
| { | |
| "epoch": 0.15647921760391198, | |
| "grad_norm": 0.2957916557788849, | |
| "learning_rate": 3.1120783460282917e-05, | |
| "loss": 1.4658, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 144, | |
| "tokens_per_second_per_gpu": 15.18 | |
| }, | |
| { | |
| "epoch": 0.1575658788372725, | |
| "grad_norm": 0.34953317046165466, | |
| "learning_rate": 3.133841131664853e-05, | |
| "loss": 1.4978, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 145, | |
| "tokens_per_second_per_gpu": 8.32 | |
| }, | |
| { | |
| "epoch": 0.15865254007063298, | |
| "grad_norm": 0.20158647000789642, | |
| "learning_rate": 3.1556039173014144e-05, | |
| "loss": 1.4494, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 146, | |
| "tokens_per_second_per_gpu": 17.64 | |
| }, | |
| { | |
| "epoch": 0.15973920130399347, | |
| "grad_norm": 0.39408671855926514, | |
| "learning_rate": 3.177366702937976e-05, | |
| "loss": 1.4845, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 147, | |
| "tokens_per_second_per_gpu": 10.64 | |
| }, | |
| { | |
| "epoch": 0.16082586253735398, | |
| "grad_norm": 0.5777578949928284, | |
| "learning_rate": 3.199129488574538e-05, | |
| "loss": 1.4023, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 148, | |
| "tokens_per_second_per_gpu": 15.12 | |
| }, | |
| { | |
| "epoch": 0.16191252377071447, | |
| "grad_norm": 0.3888914883136749, | |
| "learning_rate": 3.220892274211099e-05, | |
| "loss": 1.411, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 149, | |
| "tokens_per_second_per_gpu": 12.01 | |
| }, | |
| { | |
| "epoch": 0.16299918500407498, | |
| "grad_norm": 0.3578658998012543, | |
| "learning_rate": 3.2426550598476606e-05, | |
| "loss": 1.4251, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 150, | |
| "tokens_per_second_per_gpu": 5.84 | |
| }, | |
| { | |
| "epoch": 0.16408584623743547, | |
| "grad_norm": 0.34338808059692383, | |
| "learning_rate": 3.264417845484222e-05, | |
| "loss": 1.4228, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 151, | |
| "tokens_per_second_per_gpu": 8.35 | |
| }, | |
| { | |
| "epoch": 0.16517250747079598, | |
| "grad_norm": 0.4549909830093384, | |
| "learning_rate": 3.286180631120784e-05, | |
| "loss": 1.4411, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 152, | |
| "tokens_per_second_per_gpu": 3.86 | |
| }, | |
| { | |
| "epoch": 0.16625916870415647, | |
| "grad_norm": 0.6991769671440125, | |
| "learning_rate": 3.3079434167573454e-05, | |
| "loss": 1.4301, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 153, | |
| "tokens_per_second_per_gpu": 9.23 | |
| }, | |
| { | |
| "epoch": 0.16734582993751698, | |
| "grad_norm": 0.335506796836853, | |
| "learning_rate": 3.329706202393907e-05, | |
| "loss": 1.4708, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 154, | |
| "tokens_per_second_per_gpu": 9.89 | |
| }, | |
| { | |
| "epoch": 0.16843249117087747, | |
| "grad_norm": 0.31579703092575073, | |
| "learning_rate": 3.3514689880304675e-05, | |
| "loss": 1.4214, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 155, | |
| "tokens_per_second_per_gpu": 7.31 | |
| }, | |
| { | |
| "epoch": 0.16951915240423798, | |
| "grad_norm": 0.3945370316505432, | |
| "learning_rate": 3.3732317736670296e-05, | |
| "loss": 1.4327, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 156, | |
| "tokens_per_second_per_gpu": 8.81 | |
| }, | |
| { | |
| "epoch": 0.17060581363759847, | |
| "grad_norm": 0.3260517418384552, | |
| "learning_rate": 3.394994559303591e-05, | |
| "loss": 1.3634, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 157, | |
| "tokens_per_second_per_gpu": 15.38 | |
| }, | |
| { | |
| "epoch": 0.17169247487095898, | |
| "grad_norm": 0.2899768054485321, | |
| "learning_rate": 3.4167573449401523e-05, | |
| "loss": 1.4736, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 158, | |
| "tokens_per_second_per_gpu": 12.27 | |
| }, | |
| { | |
| "epoch": 0.17277913610431947, | |
| "grad_norm": 0.27285143733024597, | |
| "learning_rate": 3.438520130576714e-05, | |
| "loss": 1.3711, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 159, | |
| "tokens_per_second_per_gpu": 9.94 | |
| }, | |
| { | |
| "epoch": 0.17386579733767998, | |
| "grad_norm": 0.2547581195831299, | |
| "learning_rate": 3.460282916213276e-05, | |
| "loss": 1.3534, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 160, | |
| "tokens_per_second_per_gpu": 9.99 | |
| }, | |
| { | |
| "epoch": 0.17495245857104047, | |
| "grad_norm": 0.3566557765007019, | |
| "learning_rate": 3.482045701849837e-05, | |
| "loss": 1.5009, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 161, | |
| "tokens_per_second_per_gpu": 10.41 | |
| }, | |
| { | |
| "epoch": 0.17603911980440098, | |
| "grad_norm": 0.34070098400115967, | |
| "learning_rate": 3.5038084874863985e-05, | |
| "loss": 1.4655, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 162, | |
| "tokens_per_second_per_gpu": 17.99 | |
| }, | |
| { | |
| "epoch": 0.17712578103776147, | |
| "grad_norm": 0.2648494243621826, | |
| "learning_rate": 3.52557127312296e-05, | |
| "loss": 1.4366, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 163, | |
| "tokens_per_second_per_gpu": 12.61 | |
| }, | |
| { | |
| "epoch": 0.17821244227112198, | |
| "grad_norm": 0.32450008392333984, | |
| "learning_rate": 3.547334058759521e-05, | |
| "loss": 1.5348, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 164, | |
| "tokens_per_second_per_gpu": 8.44 | |
| }, | |
| { | |
| "epoch": 0.17929910350448247, | |
| "grad_norm": 0.2424926608800888, | |
| "learning_rate": 3.569096844396083e-05, | |
| "loss": 1.4597, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 165, | |
| "tokens_per_second_per_gpu": 10.99 | |
| }, | |
| { | |
| "epoch": 0.18038576473784299, | |
| "grad_norm": 0.32193416357040405, | |
| "learning_rate": 3.590859630032644e-05, | |
| "loss": 1.4483, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 166, | |
| "tokens_per_second_per_gpu": 16.6 | |
| }, | |
| { | |
| "epoch": 0.18147242597120347, | |
| "grad_norm": 0.3405689001083374, | |
| "learning_rate": 3.6126224156692054e-05, | |
| "loss": 1.3875, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 167, | |
| "tokens_per_second_per_gpu": 9.1 | |
| }, | |
| { | |
| "epoch": 0.18255908720456399, | |
| "grad_norm": 0.33213791251182556, | |
| "learning_rate": 3.6343852013057675e-05, | |
| "loss": 1.4764, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 168, | |
| "tokens_per_second_per_gpu": 8.63 | |
| }, | |
| { | |
| "epoch": 0.18364574843792447, | |
| "grad_norm": 0.27984389662742615, | |
| "learning_rate": 3.656147986942329e-05, | |
| "loss": 1.4225, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 169, | |
| "tokens_per_second_per_gpu": 20.07 | |
| }, | |
| { | |
| "epoch": 0.184732409671285, | |
| "grad_norm": 0.36502474546432495, | |
| "learning_rate": 3.67791077257889e-05, | |
| "loss": 1.4113, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 170, | |
| "tokens_per_second_per_gpu": 11.45 | |
| }, | |
| { | |
| "epoch": 0.18581907090464547, | |
| "grad_norm": 0.3809319734573364, | |
| "learning_rate": 3.6996735582154516e-05, | |
| "loss": 1.4225, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 171, | |
| "tokens_per_second_per_gpu": 14.65 | |
| }, | |
| { | |
| "epoch": 0.186905732138006, | |
| "grad_norm": 0.28744009137153625, | |
| "learning_rate": 3.721436343852013e-05, | |
| "loss": 1.4495, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 172, | |
| "tokens_per_second_per_gpu": 12.35 | |
| }, | |
| { | |
| "epoch": 0.18799239337136647, | |
| "grad_norm": 0.3467198610305786, | |
| "learning_rate": 3.743199129488575e-05, | |
| "loss": 1.4296, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 173, | |
| "tokens_per_second_per_gpu": 10.35 | |
| }, | |
| { | |
| "epoch": 0.189079054604727, | |
| "grad_norm": 0.564940333366394, | |
| "learning_rate": 3.7649619151251365e-05, | |
| "loss": 1.3606, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 174, | |
| "tokens_per_second_per_gpu": 8.31 | |
| }, | |
| { | |
| "epoch": 0.19016571583808747, | |
| "grad_norm": 0.313713401556015, | |
| "learning_rate": 3.786724700761698e-05, | |
| "loss": 1.3455, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 175, | |
| "tokens_per_second_per_gpu": 11.97 | |
| }, | |
| { | |
| "epoch": 0.191252377071448, | |
| "grad_norm": 0.3513742685317993, | |
| "learning_rate": 3.808487486398259e-05, | |
| "loss": 1.4907, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 176, | |
| "tokens_per_second_per_gpu": 12.56 | |
| }, | |
| { | |
| "epoch": 0.19233903830480847, | |
| "grad_norm": 0.2790919244289398, | |
| "learning_rate": 3.8302502720348206e-05, | |
| "loss": 1.322, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 177, | |
| "tokens_per_second_per_gpu": 18.57 | |
| }, | |
| { | |
| "epoch": 0.193425699538169, | |
| "grad_norm": 0.2438674122095108, | |
| "learning_rate": 3.852013057671382e-05, | |
| "loss": 1.424, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 178, | |
| "tokens_per_second_per_gpu": 14.74 | |
| }, | |
| { | |
| "epoch": 0.19451236077152947, | |
| "grad_norm": 0.3956673741340637, | |
| "learning_rate": 3.8737758433079434e-05, | |
| "loss": 1.4282, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 179, | |
| "tokens_per_second_per_gpu": 10.89 | |
| }, | |
| { | |
| "epoch": 0.19559902200489, | |
| "grad_norm": 0.3878747522830963, | |
| "learning_rate": 3.895538628944505e-05, | |
| "loss": 1.4225, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 180, | |
| "tokens_per_second_per_gpu": 6.92 | |
| }, | |
| { | |
| "epoch": 0.19668568323825047, | |
| "grad_norm": 0.26273518800735474, | |
| "learning_rate": 3.917301414581067e-05, | |
| "loss": 1.3401, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 181, | |
| "tokens_per_second_per_gpu": 13.06 | |
| }, | |
| { | |
| "epoch": 0.197772344471611, | |
| "grad_norm": 0.36708489060401917, | |
| "learning_rate": 3.939064200217628e-05, | |
| "loss": 1.4634, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 182, | |
| "tokens_per_second_per_gpu": 10.9 | |
| }, | |
| { | |
| "epoch": 0.19885900570497148, | |
| "grad_norm": 0.2908684015274048, | |
| "learning_rate": 3.9608269858541896e-05, | |
| "loss": 1.3991, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 183, | |
| "tokens_per_second_per_gpu": 8.87 | |
| }, | |
| { | |
| "epoch": 0.19994566693833196, | |
| "grad_norm": 0.3229577839374542, | |
| "learning_rate": 3.982589771490751e-05, | |
| "loss": 1.388, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 184, | |
| "tokens_per_second_per_gpu": 5.55 | |
| }, | |
| { | |
| "epoch": 0.20103232817169248, | |
| "grad_norm": 0.3783496916294098, | |
| "learning_rate": 4.004352557127313e-05, | |
| "loss": 1.4561, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 185, | |
| "tokens_per_second_per_gpu": 13.03 | |
| }, | |
| { | |
| "epoch": 0.20211898940505296, | |
| "grad_norm": 0.314597487449646, | |
| "learning_rate": 4.026115342763874e-05, | |
| "loss": 1.4162, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 186, | |
| "tokens_per_second_per_gpu": 11.69 | |
| }, | |
| { | |
| "epoch": 0.20320565063841348, | |
| "grad_norm": 0.3937665820121765, | |
| "learning_rate": 4.047878128400435e-05, | |
| "loss": 1.3458, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 187, | |
| "tokens_per_second_per_gpu": 12.93 | |
| }, | |
| { | |
| "epoch": 0.20429231187177396, | |
| "grad_norm": 0.37394386529922485, | |
| "learning_rate": 4.0696409140369965e-05, | |
| "loss": 1.4338, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 188, | |
| "tokens_per_second_per_gpu": 10.41 | |
| }, | |
| { | |
| "epoch": 0.20537897310513448, | |
| "grad_norm": 0.3613446354866028, | |
| "learning_rate": 4.0914036996735585e-05, | |
| "loss": 1.3917, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 189, | |
| "tokens_per_second_per_gpu": 7.67 | |
| }, | |
| { | |
| "epoch": 0.20646563433849496, | |
| "grad_norm": 0.36117660999298096, | |
| "learning_rate": 4.11316648531012e-05, | |
| "loss": 1.4352, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 190, | |
| "tokens_per_second_per_gpu": 12.25 | |
| }, | |
| { | |
| "epoch": 0.20755229557185548, | |
| "grad_norm": 0.3493390679359436, | |
| "learning_rate": 4.134929270946681e-05, | |
| "loss": 1.4273, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 191, | |
| "tokens_per_second_per_gpu": 16.61 | |
| }, | |
| { | |
| "epoch": 0.20863895680521596, | |
| "grad_norm": 0.29430046677589417, | |
| "learning_rate": 4.156692056583243e-05, | |
| "loss": 1.3934, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 192, | |
| "tokens_per_second_per_gpu": 17.26 | |
| }, | |
| { | |
| "epoch": 0.20972561803857648, | |
| "grad_norm": 0.2831302285194397, | |
| "learning_rate": 4.178454842219805e-05, | |
| "loss": 1.4511, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 193, | |
| "tokens_per_second_per_gpu": 13.76 | |
| }, | |
| { | |
| "epoch": 0.21081227927193696, | |
| "grad_norm": 0.2867391109466553, | |
| "learning_rate": 4.200217627856366e-05, | |
| "loss": 1.4039, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 194, | |
| "tokens_per_second_per_gpu": 12.15 | |
| }, | |
| { | |
| "epoch": 0.21189894050529748, | |
| "grad_norm": 0.2953890562057495, | |
| "learning_rate": 4.2219804134929275e-05, | |
| "loss": 1.3411, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 195, | |
| "tokens_per_second_per_gpu": 4.21 | |
| }, | |
| { | |
| "epoch": 0.21298560173865796, | |
| "grad_norm": 0.35373008251190186, | |
| "learning_rate": 4.243743199129489e-05, | |
| "loss": 1.3621, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 196, | |
| "tokens_per_second_per_gpu": 10.48 | |
| }, | |
| { | |
| "epoch": 0.21407226297201848, | |
| "grad_norm": 0.32863089442253113, | |
| "learning_rate": 4.26550598476605e-05, | |
| "loss": 1.3735, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 197, | |
| "tokens_per_second_per_gpu": 13.72 | |
| }, | |
| { | |
| "epoch": 0.21515892420537897, | |
| "grad_norm": 0.2762220501899719, | |
| "learning_rate": 4.2872687704026116e-05, | |
| "loss": 1.3078, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 198, | |
| "tokens_per_second_per_gpu": 7.48 | |
| }, | |
| { | |
| "epoch": 0.21624558543873948, | |
| "grad_norm": 0.3151223063468933, | |
| "learning_rate": 4.309031556039173e-05, | |
| "loss": 1.5015, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 199, | |
| "tokens_per_second_per_gpu": 10.97 | |
| }, | |
| { | |
| "epoch": 0.21733224667209997, | |
| "grad_norm": 0.379566490650177, | |
| "learning_rate": 4.3307943416757344e-05, | |
| "loss": 1.3878, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 200, | |
| "tokens_per_second_per_gpu": 11.86 | |
| }, | |
| { | |
| "epoch": 0.21841890790546048, | |
| "grad_norm": 0.3544406592845917, | |
| "learning_rate": 4.3525571273122965e-05, | |
| "loss": 1.4205, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 201, | |
| "tokens_per_second_per_gpu": 11.6 | |
| }, | |
| { | |
| "epoch": 0.21950556913882097, | |
| "grad_norm": 0.3932376205921173, | |
| "learning_rate": 4.374319912948858e-05, | |
| "loss": 1.2918, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 202, | |
| "tokens_per_second_per_gpu": 16.91 | |
| }, | |
| { | |
| "epoch": 0.22059223037218148, | |
| "grad_norm": 0.357577919960022, | |
| "learning_rate": 4.396082698585419e-05, | |
| "loss": 1.407, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 203, | |
| "tokens_per_second_per_gpu": 5.77 | |
| }, | |
| { | |
| "epoch": 0.22167889160554197, | |
| "grad_norm": 0.3274465799331665, | |
| "learning_rate": 4.4178454842219806e-05, | |
| "loss": 1.2821, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 204, | |
| "tokens_per_second_per_gpu": 8.75 | |
| }, | |
| { | |
| "epoch": 0.22276555283890248, | |
| "grad_norm": 0.43822312355041504, | |
| "learning_rate": 4.439608269858542e-05, | |
| "loss": 1.4738, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 205, | |
| "tokens_per_second_per_gpu": 14.05 | |
| }, | |
| { | |
| "epoch": 0.22385221407226297, | |
| "grad_norm": 0.3124350905418396, | |
| "learning_rate": 4.461371055495104e-05, | |
| "loss": 1.4514, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 206, | |
| "tokens_per_second_per_gpu": 9.28 | |
| }, | |
| { | |
| "epoch": 0.22493887530562348, | |
| "grad_norm": 0.3295533359050751, | |
| "learning_rate": 4.483133841131665e-05, | |
| "loss": 1.3101, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 207, | |
| "tokens_per_second_per_gpu": 6.84 | |
| }, | |
| { | |
| "epoch": 0.22602553653898397, | |
| "grad_norm": 0.4384583830833435, | |
| "learning_rate": 4.504896626768226e-05, | |
| "loss": 1.4138, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 208, | |
| "tokens_per_second_per_gpu": 12.07 | |
| }, | |
| { | |
| "epoch": 0.22711219777234448, | |
| "grad_norm": 0.325058251619339, | |
| "learning_rate": 4.526659412404788e-05, | |
| "loss": 1.3437, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 209, | |
| "tokens_per_second_per_gpu": 11.0 | |
| }, | |
| { | |
| "epoch": 0.22819885900570497, | |
| "grad_norm": 0.44415050745010376, | |
| "learning_rate": 4.5484221980413496e-05, | |
| "loss": 1.4077, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 210, | |
| "tokens_per_second_per_gpu": 13.72 | |
| }, | |
| { | |
| "epoch": 0.22928552023906548, | |
| "grad_norm": 0.2983197867870331, | |
| "learning_rate": 4.570184983677911e-05, | |
| "loss": 1.407, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 211, | |
| "tokens_per_second_per_gpu": 11.77 | |
| }, | |
| { | |
| "epoch": 0.23037218147242597, | |
| "grad_norm": 0.30090367794036865, | |
| "learning_rate": 4.591947769314472e-05, | |
| "loss": 1.3989, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 212, | |
| "tokens_per_second_per_gpu": 8.26 | |
| }, | |
| { | |
| "epoch": 0.23145884270578648, | |
| "grad_norm": 0.4387713670730591, | |
| "learning_rate": 4.613710554951034e-05, | |
| "loss": 1.4221, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 213, | |
| "tokens_per_second_per_gpu": 9.89 | |
| }, | |
| { | |
| "epoch": 0.23254550393914697, | |
| "grad_norm": 0.42906421422958374, | |
| "learning_rate": 4.635473340587596e-05, | |
| "loss": 1.3376, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 214, | |
| "tokens_per_second_per_gpu": 7.1 | |
| }, | |
| { | |
| "epoch": 0.23363216517250748, | |
| "grad_norm": 0.31574541330337524, | |
| "learning_rate": 4.657236126224157e-05, | |
| "loss": 1.3963, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 215, | |
| "tokens_per_second_per_gpu": 19.5 | |
| }, | |
| { | |
| "epoch": 0.23471882640586797, | |
| "grad_norm": 0.30392923951148987, | |
| "learning_rate": 4.6789989118607185e-05, | |
| "loss": 1.389, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 216, | |
| "tokens_per_second_per_gpu": 14.0 | |
| }, | |
| { | |
| "epoch": 0.23580548763922848, | |
| "grad_norm": 0.28484299778938293, | |
| "learning_rate": 4.70076169749728e-05, | |
| "loss": 1.4629, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 217, | |
| "tokens_per_second_per_gpu": 11.45 | |
| }, | |
| { | |
| "epoch": 0.23689214887258897, | |
| "grad_norm": 0.3333740830421448, | |
| "learning_rate": 4.722524483133841e-05, | |
| "loss": 1.4045, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 218, | |
| "tokens_per_second_per_gpu": 12.52 | |
| }, | |
| { | |
| "epoch": 0.23797881010594946, | |
| "grad_norm": 0.4336683750152588, | |
| "learning_rate": 4.744287268770403e-05, | |
| "loss": 1.35, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 219, | |
| "tokens_per_second_per_gpu": 14.74 | |
| }, | |
| { | |
| "epoch": 0.23906547133930997, | |
| "grad_norm": 0.3378277122974396, | |
| "learning_rate": 4.766050054406964e-05, | |
| "loss": 1.3772, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 220, | |
| "tokens_per_second_per_gpu": 14.21 | |
| }, | |
| { | |
| "epoch": 0.24015213257267046, | |
| "grad_norm": 0.23571667075157166, | |
| "learning_rate": 4.7878128400435254e-05, | |
| "loss": 1.4222, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 221, | |
| "tokens_per_second_per_gpu": 10.85 | |
| }, | |
| { | |
| "epoch": 0.24123879380603097, | |
| "grad_norm": 0.2858165502548218, | |
| "learning_rate": 4.8095756256800875e-05, | |
| "loss": 1.3721, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 222, | |
| "tokens_per_second_per_gpu": 16.99 | |
| }, | |
| { | |
| "epoch": 0.24232545503939146, | |
| "grad_norm": 0.42852750420570374, | |
| "learning_rate": 4.831338411316649e-05, | |
| "loss": 1.3627, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 223, | |
| "tokens_per_second_per_gpu": 11.97 | |
| }, | |
| { | |
| "epoch": 0.24341211627275197, | |
| "grad_norm": 0.334712415933609, | |
| "learning_rate": 4.85310119695321e-05, | |
| "loss": 1.401, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 224, | |
| "tokens_per_second_per_gpu": 16.26 | |
| }, | |
| { | |
| "epoch": 0.24449877750611246, | |
| "grad_norm": 0.7235128879547119, | |
| "learning_rate": 4.8748639825897716e-05, | |
| "loss": 1.4391, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 225, | |
| "tokens_per_second_per_gpu": 22.41 | |
| }, | |
| { | |
| "epoch": 0.24558543873947297, | |
| "grad_norm": 0.23709718883037567, | |
| "learning_rate": 4.896626768226334e-05, | |
| "loss": 1.4614, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 226, | |
| "tokens_per_second_per_gpu": 11.84 | |
| }, | |
| { | |
| "epoch": 0.24667209997283346, | |
| "grad_norm": 0.524126410484314, | |
| "learning_rate": 4.918389553862895e-05, | |
| "loss": 1.4045, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.57, | |
| "memory/max_allocated (GiB)": 72.57, | |
| "step": 227, | |
| "tokens_per_second_per_gpu": 6.31 | |
| }, | |
| { | |
| "epoch": 0.24775876120619397, | |
| "grad_norm": 0.4646899104118347, | |
| "learning_rate": 4.9401523394994564e-05, | |
| "loss": 1.3627, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 228, | |
| "tokens_per_second_per_gpu": 6.22 | |
| }, | |
| { | |
| "epoch": 0.24884542243955446, | |
| "grad_norm": 0.38769787549972534, | |
| "learning_rate": 4.961915125136017e-05, | |
| "loss": 1.4152, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 229, | |
| "tokens_per_second_per_gpu": 12.15 | |
| }, | |
| { | |
| "epoch": 0.24993208367291497, | |
| "grad_norm": 0.4485413730144501, | |
| "learning_rate": 4.983677910772579e-05, | |
| "loss": 1.4628, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 230, | |
| "tokens_per_second_per_gpu": 7.95 | |
| }, | |
| { | |
| "epoch": 0.25101874490627546, | |
| "grad_norm": 0.3511171340942383, | |
| "learning_rate": 5.0054406964091406e-05, | |
| "loss": 1.3475, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 231, | |
| "tokens_per_second_per_gpu": 8.64 | |
| }, | |
| { | |
| "epoch": 0.25210540613963595, | |
| "grad_norm": 0.33919912576675415, | |
| "learning_rate": 5.0272034820457026e-05, | |
| "loss": 1.3519, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 232, | |
| "tokens_per_second_per_gpu": 11.09 | |
| }, | |
| { | |
| "epoch": 0.2531920673729965, | |
| "grad_norm": 0.38596466183662415, | |
| "learning_rate": 5.0489662676822633e-05, | |
| "loss": 1.5284, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 233, | |
| "tokens_per_second_per_gpu": 9.4 | |
| }, | |
| { | |
| "epoch": 0.254278728606357, | |
| "grad_norm": 0.38259157538414, | |
| "learning_rate": 5.070729053318825e-05, | |
| "loss": 1.336, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 234, | |
| "tokens_per_second_per_gpu": 10.32 | |
| }, | |
| { | |
| "epoch": 0.25536538983971746, | |
| "grad_norm": 0.31562358140945435, | |
| "learning_rate": 5.092491838955387e-05, | |
| "loss": 1.2886, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 235, | |
| "tokens_per_second_per_gpu": 8.83 | |
| }, | |
| { | |
| "epoch": 0.25645205107307795, | |
| "grad_norm": 0.5001741051673889, | |
| "learning_rate": 5.1142546245919475e-05, | |
| "loss": 1.3563, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 236, | |
| "tokens_per_second_per_gpu": 13.55 | |
| }, | |
| { | |
| "epoch": 0.2575387123064385, | |
| "grad_norm": 0.33317190408706665, | |
| "learning_rate": 5.1360174102285095e-05, | |
| "loss": 1.3302, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 237, | |
| "tokens_per_second_per_gpu": 20.65 | |
| }, | |
| { | |
| "epoch": 0.258625373539799, | |
| "grad_norm": 0.3749401271343231, | |
| "learning_rate": 5.157780195865071e-05, | |
| "loss": 1.4049, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 238, | |
| "tokens_per_second_per_gpu": 9.57 | |
| }, | |
| { | |
| "epoch": 0.25971203477315946, | |
| "grad_norm": 0.475163996219635, | |
| "learning_rate": 5.179542981501633e-05, | |
| "loss": 1.4126, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 239, | |
| "tokens_per_second_per_gpu": 5.95 | |
| }, | |
| { | |
| "epoch": 0.26079869600651995, | |
| "grad_norm": 0.7957274913787842, | |
| "learning_rate": 5.201305767138194e-05, | |
| "loss": 1.4331, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 240, | |
| "tokens_per_second_per_gpu": 5.98 | |
| }, | |
| { | |
| "epoch": 0.2618853572398805, | |
| "grad_norm": 0.35523027181625366, | |
| "learning_rate": 5.223068552774756e-05, | |
| "loss": 1.3937, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 241, | |
| "tokens_per_second_per_gpu": 11.78 | |
| }, | |
| { | |
| "epoch": 0.262972018473241, | |
| "grad_norm": 0.31284523010253906, | |
| "learning_rate": 5.244831338411317e-05, | |
| "loss": 1.3867, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 242, | |
| "tokens_per_second_per_gpu": 15.51 | |
| }, | |
| { | |
| "epoch": 0.26405867970660146, | |
| "grad_norm": 0.27561265230178833, | |
| "learning_rate": 5.266594124047879e-05, | |
| "loss": 1.3665, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 243, | |
| "tokens_per_second_per_gpu": 15.24 | |
| }, | |
| { | |
| "epoch": 0.26514534093996195, | |
| "grad_norm": 0.3205089867115021, | |
| "learning_rate": 5.28835690968444e-05, | |
| "loss": 1.4183, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 244, | |
| "tokens_per_second_per_gpu": 13.83 | |
| }, | |
| { | |
| "epoch": 0.2662320021733225, | |
| "grad_norm": 0.3255348205566406, | |
| "learning_rate": 5.3101196953210006e-05, | |
| "loss": 1.3063, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 245, | |
| "tokens_per_second_per_gpu": 11.45 | |
| }, | |
| { | |
| "epoch": 0.267318663406683, | |
| "grad_norm": 0.34641212224960327, | |
| "learning_rate": 5.3318824809575626e-05, | |
| "loss": 1.3408, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 246, | |
| "tokens_per_second_per_gpu": 12.28 | |
| }, | |
| { | |
| "epoch": 0.26840532464004346, | |
| "grad_norm": 0.34771260619163513, | |
| "learning_rate": 5.353645266594124e-05, | |
| "loss": 1.4253, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 247, | |
| "tokens_per_second_per_gpu": 12.58 | |
| }, | |
| { | |
| "epoch": 0.26949198587340395, | |
| "grad_norm": 0.42693275213241577, | |
| "learning_rate": 5.375408052230686e-05, | |
| "loss": 1.3531, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 248, | |
| "tokens_per_second_per_gpu": 13.29 | |
| }, | |
| { | |
| "epoch": 0.2705786471067645, | |
| "grad_norm": 0.24540139734745026, | |
| "learning_rate": 5.397170837867247e-05, | |
| "loss": 1.4186, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 249, | |
| "tokens_per_second_per_gpu": 5.48 | |
| }, | |
| { | |
| "epoch": 0.271665308340125, | |
| "grad_norm": 0.32208290696144104, | |
| "learning_rate": 5.418933623503809e-05, | |
| "loss": 1.4204, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 250, | |
| "tokens_per_second_per_gpu": 4.79 | |
| }, | |
| { | |
| "epoch": 0.27275196957348546, | |
| "grad_norm": 0.28593209385871887, | |
| "learning_rate": 5.44069640914037e-05, | |
| "loss": 1.333, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 251, | |
| "tokens_per_second_per_gpu": 15.89 | |
| }, | |
| { | |
| "epoch": 0.27383863080684595, | |
| "grad_norm": 0.28090038895606995, | |
| "learning_rate": 5.462459194776932e-05, | |
| "loss": 1.3369, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 252, | |
| "tokens_per_second_per_gpu": 14.31 | |
| }, | |
| { | |
| "epoch": 0.2749252920402065, | |
| "grad_norm": 0.31984612345695496, | |
| "learning_rate": 5.484221980413493e-05, | |
| "loss": 1.3276, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 253, | |
| "tokens_per_second_per_gpu": 11.32 | |
| }, | |
| { | |
| "epoch": 0.276011953273567, | |
| "grad_norm": 0.2895549237728119, | |
| "learning_rate": 5.505984766050055e-05, | |
| "loss": 1.4077, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 254, | |
| "tokens_per_second_per_gpu": 15.6 | |
| }, | |
| { | |
| "epoch": 0.27709861450692747, | |
| "grad_norm": 0.23163455724716187, | |
| "learning_rate": 5.5277475516866164e-05, | |
| "loss": 1.4143, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 255, | |
| "tokens_per_second_per_gpu": 9.27 | |
| }, | |
| { | |
| "epoch": 0.27818527574028795, | |
| "grad_norm": 0.34795793890953064, | |
| "learning_rate": 5.549510337323177e-05, | |
| "loss": 1.3948, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 256, | |
| "tokens_per_second_per_gpu": 12.63 | |
| }, | |
| { | |
| "epoch": 0.27927193697364844, | |
| "grad_norm": 0.6224808692932129, | |
| "learning_rate": 5.571273122959739e-05, | |
| "loss": 1.3397, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 257, | |
| "tokens_per_second_per_gpu": 12.32 | |
| }, | |
| { | |
| "epoch": 0.280358598207009, | |
| "grad_norm": 0.46499934792518616, | |
| "learning_rate": 5.5930359085963e-05, | |
| "loss": 1.4027, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 258, | |
| "tokens_per_second_per_gpu": 10.76 | |
| }, | |
| { | |
| "epoch": 0.28144525944036947, | |
| "grad_norm": 0.3597241938114166, | |
| "learning_rate": 5.6147986942328626e-05, | |
| "loss": 1.3146, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 259, | |
| "tokens_per_second_per_gpu": 14.8 | |
| }, | |
| { | |
| "epoch": 0.28253192067372995, | |
| "grad_norm": 0.3473433554172516, | |
| "learning_rate": 5.636561479869423e-05, | |
| "loss": 1.3467, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 260, | |
| "tokens_per_second_per_gpu": 26.5 | |
| }, | |
| { | |
| "epoch": 0.28361858190709044, | |
| "grad_norm": 0.2812510132789612, | |
| "learning_rate": 5.6583242655059854e-05, | |
| "loss": 1.3229, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 261, | |
| "tokens_per_second_per_gpu": 10.09 | |
| }, | |
| { | |
| "epoch": 0.284705243140451, | |
| "grad_norm": 0.4601527452468872, | |
| "learning_rate": 5.680087051142546e-05, | |
| "loss": 1.349, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 262, | |
| "tokens_per_second_per_gpu": 7.82 | |
| }, | |
| { | |
| "epoch": 0.28579190437381147, | |
| "grad_norm": 0.326623797416687, | |
| "learning_rate": 5.701849836779108e-05, | |
| "loss": 1.3685, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 263, | |
| "tokens_per_second_per_gpu": 8.57 | |
| }, | |
| { | |
| "epoch": 0.28687856560717195, | |
| "grad_norm": 0.37015482783317566, | |
| "learning_rate": 5.7236126224156695e-05, | |
| "loss": 1.3301, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 264, | |
| "tokens_per_second_per_gpu": 17.81 | |
| }, | |
| { | |
| "epoch": 0.28796522684053244, | |
| "grad_norm": 0.3805665075778961, | |
| "learning_rate": 5.7453754080522316e-05, | |
| "loss": 1.3553, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 265, | |
| "tokens_per_second_per_gpu": 11.99 | |
| }, | |
| { | |
| "epoch": 0.289051888073893, | |
| "grad_norm": 0.31079602241516113, | |
| "learning_rate": 5.767138193688792e-05, | |
| "loss": 1.3156, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 266, | |
| "tokens_per_second_per_gpu": 3.84 | |
| }, | |
| { | |
| "epoch": 0.29013854930725347, | |
| "grad_norm": 0.31146419048309326, | |
| "learning_rate": 5.788900979325354e-05, | |
| "loss": 1.3991, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 267, | |
| "tokens_per_second_per_gpu": 13.78 | |
| }, | |
| { | |
| "epoch": 0.29122521054061395, | |
| "grad_norm": 0.2825380563735962, | |
| "learning_rate": 5.810663764961916e-05, | |
| "loss": 1.3302, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 268, | |
| "tokens_per_second_per_gpu": 10.56 | |
| }, | |
| { | |
| "epoch": 0.29231187177397444, | |
| "grad_norm": 0.2612122893333435, | |
| "learning_rate": 5.8324265505984764e-05, | |
| "loss": 1.3334, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 269, | |
| "tokens_per_second_per_gpu": 7.18 | |
| }, | |
| { | |
| "epoch": 0.293398533007335, | |
| "grad_norm": 0.29406338930130005, | |
| "learning_rate": 5.8541893362350385e-05, | |
| "loss": 1.3807, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 270, | |
| "tokens_per_second_per_gpu": 13.25 | |
| }, | |
| { | |
| "epoch": 0.29448519424069547, | |
| "grad_norm": 0.2625284492969513, | |
| "learning_rate": 5.8759521218716e-05, | |
| "loss": 1.3292, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 271, | |
| "tokens_per_second_per_gpu": 16.06 | |
| }, | |
| { | |
| "epoch": 0.29557185547405596, | |
| "grad_norm": 0.29185494780540466, | |
| "learning_rate": 5.897714907508162e-05, | |
| "loss": 1.3265, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 272, | |
| "tokens_per_second_per_gpu": 9.4 | |
| }, | |
| { | |
| "epoch": 0.29665851670741644, | |
| "grad_norm": 0.6952967047691345, | |
| "learning_rate": 5.9194776931447226e-05, | |
| "loss": 1.3592, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 273, | |
| "tokens_per_second_per_gpu": 8.86 | |
| }, | |
| { | |
| "epoch": 0.297745177940777, | |
| "grad_norm": 0.32257288694381714, | |
| "learning_rate": 5.941240478781285e-05, | |
| "loss": 1.3248, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 274, | |
| "tokens_per_second_per_gpu": 8.22 | |
| }, | |
| { | |
| "epoch": 0.29883183917413747, | |
| "grad_norm": 0.3245685398578644, | |
| "learning_rate": 5.9630032644178454e-05, | |
| "loss": 1.3602, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 275, | |
| "tokens_per_second_per_gpu": 17.39 | |
| }, | |
| { | |
| "epoch": 0.29991850040749796, | |
| "grad_norm": 0.37213394045829773, | |
| "learning_rate": 5.984766050054407e-05, | |
| "loss": 1.3104, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 276, | |
| "tokens_per_second_per_gpu": 15.06 | |
| }, | |
| { | |
| "epoch": 0.30100516164085844, | |
| "grad_norm": 0.2887786030769348, | |
| "learning_rate": 6.006528835690969e-05, | |
| "loss": 1.2689, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 277, | |
| "tokens_per_second_per_gpu": 10.1 | |
| }, | |
| { | |
| "epoch": 0.302091822874219, | |
| "grad_norm": 0.33155256509780884, | |
| "learning_rate": 6.0282916213275295e-05, | |
| "loss": 1.4124, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 278, | |
| "tokens_per_second_per_gpu": 9.28 | |
| }, | |
| { | |
| "epoch": 0.30317848410757947, | |
| "grad_norm": 0.5082276463508606, | |
| "learning_rate": 6.0500544069640916e-05, | |
| "loss": 1.3069, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 279, | |
| "tokens_per_second_per_gpu": 10.71 | |
| }, | |
| { | |
| "epoch": 0.30426514534093996, | |
| "grad_norm": 0.32618454098701477, | |
| "learning_rate": 6.071817192600653e-05, | |
| "loss": 1.318, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 280, | |
| "tokens_per_second_per_gpu": 8.53 | |
| }, | |
| { | |
| "epoch": 0.30535180657430044, | |
| "grad_norm": 0.41613566875457764, | |
| "learning_rate": 6.093579978237215e-05, | |
| "loss": 1.4003, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 281, | |
| "tokens_per_second_per_gpu": 14.79 | |
| }, | |
| { | |
| "epoch": 0.306438467807661, | |
| "grad_norm": 0.3233785331249237, | |
| "learning_rate": 6.115342763873776e-05, | |
| "loss": 1.3504, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 282, | |
| "tokens_per_second_per_gpu": 10.18 | |
| }, | |
| { | |
| "epoch": 0.30752512904102147, | |
| "grad_norm": 0.4473751485347748, | |
| "learning_rate": 6.137105549510338e-05, | |
| "loss": 1.3966, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 283, | |
| "tokens_per_second_per_gpu": 9.81 | |
| }, | |
| { | |
| "epoch": 0.30861179027438196, | |
| "grad_norm": 0.29760703444480896, | |
| "learning_rate": 6.158868335146899e-05, | |
| "loss": 1.296, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 284, | |
| "tokens_per_second_per_gpu": 14.26 | |
| }, | |
| { | |
| "epoch": 0.30969845150774244, | |
| "grad_norm": 0.2307472825050354, | |
| "learning_rate": 6.18063112078346e-05, | |
| "loss": 1.2889, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 285, | |
| "tokens_per_second_per_gpu": 7.58 | |
| }, | |
| { | |
| "epoch": 0.310785112741103, | |
| "grad_norm": 0.5018793940544128, | |
| "learning_rate": 6.202393906420022e-05, | |
| "loss": 1.3509, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 286, | |
| "tokens_per_second_per_gpu": 6.92 | |
| }, | |
| { | |
| "epoch": 0.3118717739744635, | |
| "grad_norm": 0.317269891500473, | |
| "learning_rate": 6.224156692056583e-05, | |
| "loss": 1.3496, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 287, | |
| "tokens_per_second_per_gpu": 11.81 | |
| }, | |
| { | |
| "epoch": 0.31295843520782396, | |
| "grad_norm": 0.39457565546035767, | |
| "learning_rate": 6.245919477693145e-05, | |
| "loss": 1.3841, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 288, | |
| "tokens_per_second_per_gpu": 9.91 | |
| }, | |
| { | |
| "epoch": 0.31404509644118445, | |
| "grad_norm": 0.3295256495475769, | |
| "learning_rate": 6.267682263329706e-05, | |
| "loss": 1.4099, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 289, | |
| "tokens_per_second_per_gpu": 11.16 | |
| }, | |
| { | |
| "epoch": 0.315131757674545, | |
| "grad_norm": 0.3775258958339691, | |
| "learning_rate": 6.289445048966269e-05, | |
| "loss": 1.5358, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 290, | |
| "tokens_per_second_per_gpu": 4.24 | |
| }, | |
| { | |
| "epoch": 0.3162184189079055, | |
| "grad_norm": 0.24287149310112, | |
| "learning_rate": 6.311207834602829e-05, | |
| "loss": 1.3862, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 291, | |
| "tokens_per_second_per_gpu": 19.98 | |
| }, | |
| { | |
| "epoch": 0.31730508014126596, | |
| "grad_norm": 0.3070085942745209, | |
| "learning_rate": 6.332970620239392e-05, | |
| "loss": 1.3635, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 292, | |
| "tokens_per_second_per_gpu": 6.92 | |
| }, | |
| { | |
| "epoch": 0.31839174137462645, | |
| "grad_norm": 0.3488074839115143, | |
| "learning_rate": 6.354733405875952e-05, | |
| "loss": 1.2525, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 293, | |
| "tokens_per_second_per_gpu": 14.36 | |
| }, | |
| { | |
| "epoch": 0.31947840260798693, | |
| "grad_norm": 0.3388550579547882, | |
| "learning_rate": 6.376496191512514e-05, | |
| "loss": 1.373, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 294, | |
| "tokens_per_second_per_gpu": 13.73 | |
| }, | |
| { | |
| "epoch": 0.3205650638413475, | |
| "grad_norm": 0.3950432240962982, | |
| "learning_rate": 6.398258977149076e-05, | |
| "loss": 1.351, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 295, | |
| "tokens_per_second_per_gpu": 12.67 | |
| }, | |
| { | |
| "epoch": 0.32165172507470796, | |
| "grad_norm": 0.439014196395874, | |
| "learning_rate": 6.420021762785637e-05, | |
| "loss": 1.3355, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 296, | |
| "tokens_per_second_per_gpu": 15.01 | |
| }, | |
| { | |
| "epoch": 0.32273838630806845, | |
| "grad_norm": 0.2883545160293579, | |
| "learning_rate": 6.441784548422198e-05, | |
| "loss": 1.3526, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 297, | |
| "tokens_per_second_per_gpu": 13.36 | |
| }, | |
| { | |
| "epoch": 0.32382504754142893, | |
| "grad_norm": 0.34774455428123474, | |
| "learning_rate": 6.46354733405876e-05, | |
| "loss": 1.2912, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 298, | |
| "tokens_per_second_per_gpu": 6.8 | |
| }, | |
| { | |
| "epoch": 0.3249117087747895, | |
| "grad_norm": 0.3952115774154663, | |
| "learning_rate": 6.485310119695321e-05, | |
| "loss": 1.3897, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 299, | |
| "tokens_per_second_per_gpu": 8.15 | |
| }, | |
| { | |
| "epoch": 0.32599837000814996, | |
| "grad_norm": 0.38997477293014526, | |
| "learning_rate": 6.507072905331883e-05, | |
| "loss": 1.3292, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 300, | |
| "tokens_per_second_per_gpu": 21.64 | |
| }, | |
| { | |
| "epoch": 0.32708503124151045, | |
| "grad_norm": 0.26448020339012146, | |
| "learning_rate": 6.528835690968444e-05, | |
| "loss": 1.3333, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 301, | |
| "tokens_per_second_per_gpu": 14.36 | |
| }, | |
| { | |
| "epoch": 0.32817169247487094, | |
| "grad_norm": 0.30526965856552124, | |
| "learning_rate": 6.550598476605005e-05, | |
| "loss": 1.3987, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 302, | |
| "tokens_per_second_per_gpu": 14.03 | |
| }, | |
| { | |
| "epoch": 0.3292583537082315, | |
| "grad_norm": 0.3571244478225708, | |
| "learning_rate": 6.572361262241568e-05, | |
| "loss": 1.3885, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 303, | |
| "tokens_per_second_per_gpu": 9.27 | |
| }, | |
| { | |
| "epoch": 0.33034501494159196, | |
| "grad_norm": 0.26890137791633606, | |
| "learning_rate": 6.594124047878128e-05, | |
| "loss": 1.3313, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 304, | |
| "tokens_per_second_per_gpu": 9.44 | |
| }, | |
| { | |
| "epoch": 0.33143167617495245, | |
| "grad_norm": 0.5638411045074463, | |
| "learning_rate": 6.615886833514691e-05, | |
| "loss": 1.3728, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 305, | |
| "tokens_per_second_per_gpu": 3.91 | |
| }, | |
| { | |
| "epoch": 0.33251833740831294, | |
| "grad_norm": 0.31874948740005493, | |
| "learning_rate": 6.637649619151252e-05, | |
| "loss": 1.3647, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 306, | |
| "tokens_per_second_per_gpu": 11.06 | |
| }, | |
| { | |
| "epoch": 0.3336049986416735, | |
| "grad_norm": 0.35177457332611084, | |
| "learning_rate": 6.659412404787814e-05, | |
| "loss": 1.3633, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 307, | |
| "tokens_per_second_per_gpu": 6.22 | |
| }, | |
| { | |
| "epoch": 0.33469165987503396, | |
| "grad_norm": 0.37237054109573364, | |
| "learning_rate": 6.681175190424375e-05, | |
| "loss": 1.3305, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 308, | |
| "tokens_per_second_per_gpu": 11.35 | |
| }, | |
| { | |
| "epoch": 0.33577832110839445, | |
| "grad_norm": 0.33290013670921326, | |
| "learning_rate": 6.702937976060935e-05, | |
| "loss": 1.3155, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 309, | |
| "tokens_per_second_per_gpu": 8.39 | |
| }, | |
| { | |
| "epoch": 0.33686498234175494, | |
| "grad_norm": 0.3056808114051819, | |
| "learning_rate": 6.724700761697498e-05, | |
| "loss": 1.3555, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 310, | |
| "tokens_per_second_per_gpu": 15.45 | |
| }, | |
| { | |
| "epoch": 0.3379516435751155, | |
| "grad_norm": 0.38003915548324585, | |
| "learning_rate": 6.746463547334059e-05, | |
| "loss": 1.4225, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 311, | |
| "tokens_per_second_per_gpu": 12.66 | |
| }, | |
| { | |
| "epoch": 0.33903830480847597, | |
| "grad_norm": 0.3557988703250885, | |
| "learning_rate": 6.76822633297062e-05, | |
| "loss": 1.2969, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 312, | |
| "tokens_per_second_per_gpu": 20.36 | |
| }, | |
| { | |
| "epoch": 0.34012496604183645, | |
| "grad_norm": 0.38589081168174744, | |
| "learning_rate": 6.789989118607182e-05, | |
| "loss": 1.4157, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 313, | |
| "tokens_per_second_per_gpu": 8.63 | |
| }, | |
| { | |
| "epoch": 0.34121162727519694, | |
| "grad_norm": 0.3078034222126007, | |
| "learning_rate": 6.811751904243743e-05, | |
| "loss": 1.336, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 314, | |
| "tokens_per_second_per_gpu": 5.01 | |
| }, | |
| { | |
| "epoch": 0.3422982885085575, | |
| "grad_norm": 0.46178382635116577, | |
| "learning_rate": 6.833514689880305e-05, | |
| "loss": 1.4031, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 315, | |
| "tokens_per_second_per_gpu": 9.35 | |
| }, | |
| { | |
| "epoch": 0.34338494974191797, | |
| "grad_norm": 0.3118983209133148, | |
| "learning_rate": 6.855277475516867e-05, | |
| "loss": 1.4071, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 316, | |
| "tokens_per_second_per_gpu": 6.89 | |
| }, | |
| { | |
| "epoch": 0.34447161097527845, | |
| "grad_norm": 0.314553827047348, | |
| "learning_rate": 6.877040261153427e-05, | |
| "loss": 1.3786, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 317, | |
| "tokens_per_second_per_gpu": 7.95 | |
| }, | |
| { | |
| "epoch": 0.34555827220863894, | |
| "grad_norm": 0.33250656723976135, | |
| "learning_rate": 6.898803046789989e-05, | |
| "loss": 1.3884, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 318, | |
| "tokens_per_second_per_gpu": 19.28 | |
| }, | |
| { | |
| "epoch": 0.3466449334419995, | |
| "grad_norm": 0.3305460214614868, | |
| "learning_rate": 6.920565832426552e-05, | |
| "loss": 1.3223, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 319, | |
| "tokens_per_second_per_gpu": 12.68 | |
| }, | |
| { | |
| "epoch": 0.34773159467535997, | |
| "grad_norm": 0.28660210967063904, | |
| "learning_rate": 6.942328618063112e-05, | |
| "loss": 1.2589, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 320, | |
| "tokens_per_second_per_gpu": 12.26 | |
| }, | |
| { | |
| "epoch": 0.34881825590872045, | |
| "grad_norm": 0.34048280119895935, | |
| "learning_rate": 6.964091403699674e-05, | |
| "loss": 1.3913, | |
| "memory/device_reserved (GiB)": 76.46, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 321, | |
| "tokens_per_second_per_gpu": 8.45 | |
| }, | |
| { | |
| "epoch": 0.34990491714208094, | |
| "grad_norm": 0.3876027762889862, | |
| "learning_rate": 6.985854189336236e-05, | |
| "loss": 1.3746, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 322, | |
| "tokens_per_second_per_gpu": 16.03 | |
| }, | |
| { | |
| "epoch": 0.3509915783754415, | |
| "grad_norm": 0.5212762355804443, | |
| "learning_rate": 7.007616974972797e-05, | |
| "loss": 1.3509, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 323, | |
| "tokens_per_second_per_gpu": 4.05 | |
| }, | |
| { | |
| "epoch": 0.35207823960880197, | |
| "grad_norm": 0.39593666791915894, | |
| "learning_rate": 7.029379760609358e-05, | |
| "loss": 1.345, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 324, | |
| "tokens_per_second_per_gpu": 7.26 | |
| }, | |
| { | |
| "epoch": 0.35316490084216245, | |
| "grad_norm": 0.26743555068969727, | |
| "learning_rate": 7.05114254624592e-05, | |
| "loss": 1.3025, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 325, | |
| "tokens_per_second_per_gpu": 12.96 | |
| }, | |
| { | |
| "epoch": 0.35425156207552294, | |
| "grad_norm": 0.3313232958316803, | |
| "learning_rate": 7.072905331882481e-05, | |
| "loss": 1.3059, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 326, | |
| "tokens_per_second_per_gpu": 10.27 | |
| }, | |
| { | |
| "epoch": 0.3553382233088835, | |
| "grad_norm": 0.4047112762928009, | |
| "learning_rate": 7.094668117519043e-05, | |
| "loss": 1.2951, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 327, | |
| "tokens_per_second_per_gpu": 7.12 | |
| }, | |
| { | |
| "epoch": 0.35642488454224397, | |
| "grad_norm": 0.393057644367218, | |
| "learning_rate": 7.116430903155604e-05, | |
| "loss": 1.3892, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 328, | |
| "tokens_per_second_per_gpu": 12.7 | |
| }, | |
| { | |
| "epoch": 0.35751154577560446, | |
| "grad_norm": 0.31370872259140015, | |
| "learning_rate": 7.138193688792165e-05, | |
| "loss": 1.2839, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 329, | |
| "tokens_per_second_per_gpu": 9.5 | |
| }, | |
| { | |
| "epoch": 0.35859820700896494, | |
| "grad_norm": 0.36633893847465515, | |
| "learning_rate": 7.159956474428727e-05, | |
| "loss": 1.4089, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 330, | |
| "tokens_per_second_per_gpu": 11.04 | |
| }, | |
| { | |
| "epoch": 0.35968486824232543, | |
| "grad_norm": 0.35443341732025146, | |
| "learning_rate": 7.181719260065288e-05, | |
| "loss": 1.3675, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 331, | |
| "tokens_per_second_per_gpu": 15.33 | |
| }, | |
| { | |
| "epoch": 0.36077152947568597, | |
| "grad_norm": 0.3740675449371338, | |
| "learning_rate": 7.203482045701851e-05, | |
| "loss": 1.4137, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 332, | |
| "tokens_per_second_per_gpu": 8.03 | |
| }, | |
| { | |
| "epoch": 0.36185819070904646, | |
| "grad_norm": 0.586748480796814, | |
| "learning_rate": 7.225244831338411e-05, | |
| "loss": 1.3121, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 333, | |
| "tokens_per_second_per_gpu": 15.79 | |
| }, | |
| { | |
| "epoch": 0.36294485194240694, | |
| "grad_norm": 1.100968360900879, | |
| "learning_rate": 7.247007616974974e-05, | |
| "loss": 1.3187, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 334, | |
| "tokens_per_second_per_gpu": 6.97 | |
| }, | |
| { | |
| "epoch": 0.36403151317576743, | |
| "grad_norm": 0.3769768178462982, | |
| "learning_rate": 7.268770402611535e-05, | |
| "loss": 1.2977, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 335, | |
| "tokens_per_second_per_gpu": 8.59 | |
| }, | |
| { | |
| "epoch": 0.36511817440912797, | |
| "grad_norm": 0.4046242833137512, | |
| "learning_rate": 7.290533188248096e-05, | |
| "loss": 1.3625, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 336, | |
| "tokens_per_second_per_gpu": 4.73 | |
| }, | |
| { | |
| "epoch": 0.36620483564248846, | |
| "grad_norm": 0.4921051859855652, | |
| "learning_rate": 7.312295973884658e-05, | |
| "loss": 1.3709, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 337, | |
| "tokens_per_second_per_gpu": 5.54 | |
| }, | |
| { | |
| "epoch": 0.36729149687584894, | |
| "grad_norm": 0.32319122552871704, | |
| "learning_rate": 7.334058759521219e-05, | |
| "loss": 1.3046, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 338, | |
| "tokens_per_second_per_gpu": 10.32 | |
| }, | |
| { | |
| "epoch": 0.36837815810920943, | |
| "grad_norm": 0.4417561888694763, | |
| "learning_rate": 7.35582154515778e-05, | |
| "loss": 1.3156, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 339, | |
| "tokens_per_second_per_gpu": 10.4 | |
| }, | |
| { | |
| "epoch": 0.36946481934257, | |
| "grad_norm": 0.31876614689826965, | |
| "learning_rate": 7.377584330794342e-05, | |
| "loss": 1.3163, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 340, | |
| "tokens_per_second_per_gpu": 16.85 | |
| }, | |
| { | |
| "epoch": 0.37055148057593046, | |
| "grad_norm": 0.27336418628692627, | |
| "learning_rate": 7.399347116430903e-05, | |
| "loss": 1.3835, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 341, | |
| "tokens_per_second_per_gpu": 8.16 | |
| }, | |
| { | |
| "epoch": 0.37163814180929094, | |
| "grad_norm": 0.2615598142147064, | |
| "learning_rate": 7.421109902067465e-05, | |
| "loss": 1.384, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 342, | |
| "tokens_per_second_per_gpu": 10.96 | |
| }, | |
| { | |
| "epoch": 0.37272480304265143, | |
| "grad_norm": 0.491675466299057, | |
| "learning_rate": 7.442872687704026e-05, | |
| "loss": 1.3459, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 343, | |
| "tokens_per_second_per_gpu": 3.89 | |
| }, | |
| { | |
| "epoch": 0.373811464276012, | |
| "grad_norm": 0.39106589555740356, | |
| "learning_rate": 7.464635473340587e-05, | |
| "loss": 1.3546, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 344, | |
| "tokens_per_second_per_gpu": 16.28 | |
| }, | |
| { | |
| "epoch": 0.37489812550937246, | |
| "grad_norm": 0.3510796129703522, | |
| "learning_rate": 7.48639825897715e-05, | |
| "loss": 1.3337, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 345, | |
| "tokens_per_second_per_gpu": 14.87 | |
| }, | |
| { | |
| "epoch": 0.37598478674273295, | |
| "grad_norm": 0.30834734439849854, | |
| "learning_rate": 7.50816104461371e-05, | |
| "loss": 1.3162, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 346, | |
| "tokens_per_second_per_gpu": 5.58 | |
| }, | |
| { | |
| "epoch": 0.37707144797609343, | |
| "grad_norm": 0.42482101917266846, | |
| "learning_rate": 7.529923830250273e-05, | |
| "loss": 1.3224, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 347, | |
| "tokens_per_second_per_gpu": 18.84 | |
| }, | |
| { | |
| "epoch": 0.378158109209454, | |
| "grad_norm": 0.6320729851722717, | |
| "learning_rate": 7.551686615886834e-05, | |
| "loss": 1.3369, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 348, | |
| "tokens_per_second_per_gpu": 15.51 | |
| }, | |
| { | |
| "epoch": 0.37924477044281446, | |
| "grad_norm": 0.45338988304138184, | |
| "learning_rate": 7.573449401523396e-05, | |
| "loss": 1.3303, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 349, | |
| "tokens_per_second_per_gpu": 11.51 | |
| }, | |
| { | |
| "epoch": 0.38033143167617495, | |
| "grad_norm": 0.2993900179862976, | |
| "learning_rate": 7.595212187159957e-05, | |
| "loss": 1.3784, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 350, | |
| "tokens_per_second_per_gpu": 12.53 | |
| }, | |
| { | |
| "epoch": 0.38141809290953543, | |
| "grad_norm": 0.44423869252204895, | |
| "learning_rate": 7.616974972796518e-05, | |
| "loss": 1.3219, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 351, | |
| "tokens_per_second_per_gpu": 12.64 | |
| }, | |
| { | |
| "epoch": 0.382504754142896, | |
| "grad_norm": 0.38582709431648254, | |
| "learning_rate": 7.63873775843308e-05, | |
| "loss": 1.2591, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 352, | |
| "tokens_per_second_per_gpu": 16.1 | |
| }, | |
| { | |
| "epoch": 0.38359141537625646, | |
| "grad_norm": 0.3074321746826172, | |
| "learning_rate": 7.660500544069641e-05, | |
| "loss": 1.3865, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 353, | |
| "tokens_per_second_per_gpu": 17.55 | |
| }, | |
| { | |
| "epoch": 0.38467807660961695, | |
| "grad_norm": 0.2700411081314087, | |
| "learning_rate": 7.682263329706203e-05, | |
| "loss": 1.3647, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 354, | |
| "tokens_per_second_per_gpu": 15.07 | |
| }, | |
| { | |
| "epoch": 0.38576473784297743, | |
| "grad_norm": 0.35122451186180115, | |
| "learning_rate": 7.704026115342764e-05, | |
| "loss": 1.368, | |
| "memory/device_reserved (GiB)": 76.47, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 355, | |
| "tokens_per_second_per_gpu": 8.28 | |
| }, | |
| { | |
| "epoch": 0.386851399076338, | |
| "grad_norm": 0.5305274128913879, | |
| "learning_rate": 7.725788900979327e-05, | |
| "loss": 1.2905, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 356, | |
| "tokens_per_second_per_gpu": 11.31 | |
| }, | |
| { | |
| "epoch": 0.38793806030969846, | |
| "grad_norm": 0.3077031672000885, | |
| "learning_rate": 7.747551686615887e-05, | |
| "loss": 1.2523, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 357, | |
| "tokens_per_second_per_gpu": 2.67 | |
| }, | |
| { | |
| "epoch": 0.38902472154305895, | |
| "grad_norm": 0.39146187901496887, | |
| "learning_rate": 7.76931447225245e-05, | |
| "loss": 1.3954, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 358, | |
| "tokens_per_second_per_gpu": 14.08 | |
| }, | |
| { | |
| "epoch": 0.39011138277641944, | |
| "grad_norm": 0.2832060158252716, | |
| "learning_rate": 7.79107725788901e-05, | |
| "loss": 1.3354, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 359, | |
| "tokens_per_second_per_gpu": 18.37 | |
| }, | |
| { | |
| "epoch": 0.39119804400978, | |
| "grad_norm": 0.2699771821498871, | |
| "learning_rate": 7.812840043525572e-05, | |
| "loss": 1.3645, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 360, | |
| "tokens_per_second_per_gpu": 6.66 | |
| }, | |
| { | |
| "epoch": 0.39228470524314046, | |
| "grad_norm": 0.3361879289150238, | |
| "learning_rate": 7.834602829162134e-05, | |
| "loss": 1.3952, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 361, | |
| "tokens_per_second_per_gpu": 10.8 | |
| }, | |
| { | |
| "epoch": 0.39337136647650095, | |
| "grad_norm": 0.3688875436782837, | |
| "learning_rate": 7.856365614798694e-05, | |
| "loss": 1.3926, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 362, | |
| "tokens_per_second_per_gpu": 10.69 | |
| }, | |
| { | |
| "epoch": 0.39445802770986144, | |
| "grad_norm": 0.4953528940677643, | |
| "learning_rate": 7.878128400435256e-05, | |
| "loss": 1.3768, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 363, | |
| "tokens_per_second_per_gpu": 10.88 | |
| }, | |
| { | |
| "epoch": 0.395544688943222, | |
| "grad_norm": 0.30809733271598816, | |
| "learning_rate": 7.899891186071818e-05, | |
| "loss": 1.2944, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 364, | |
| "tokens_per_second_per_gpu": 13.12 | |
| }, | |
| { | |
| "epoch": 0.39663135017658246, | |
| "grad_norm": 0.36492279171943665, | |
| "learning_rate": 7.921653971708379e-05, | |
| "loss": 1.3065, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 365, | |
| "tokens_per_second_per_gpu": 7.73 | |
| }, | |
| { | |
| "epoch": 0.39771801140994295, | |
| "grad_norm": 0.3043460249900818, | |
| "learning_rate": 7.94341675734494e-05, | |
| "loss": 1.3067, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 366, | |
| "tokens_per_second_per_gpu": 12.88 | |
| }, | |
| { | |
| "epoch": 0.39880467264330344, | |
| "grad_norm": 0.38396111130714417, | |
| "learning_rate": 7.965179542981502e-05, | |
| "loss": 1.2807, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 367, | |
| "tokens_per_second_per_gpu": 4.49 | |
| }, | |
| { | |
| "epoch": 0.3998913338766639, | |
| "grad_norm": 0.2538345754146576, | |
| "learning_rate": 7.986942328618063e-05, | |
| "loss": 1.3443, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 368, | |
| "tokens_per_second_per_gpu": 5.37 | |
| }, | |
| { | |
| "epoch": 0.40097799511002447, | |
| "grad_norm": 0.3699328303337097, | |
| "learning_rate": 8.008705114254626e-05, | |
| "loss": 1.4054, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 369, | |
| "tokens_per_second_per_gpu": 10.97 | |
| }, | |
| { | |
| "epoch": 0.40206465634338495, | |
| "grad_norm": 0.28283485770225525, | |
| "learning_rate": 8.030467899891186e-05, | |
| "loss": 1.2713, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 370, | |
| "tokens_per_second_per_gpu": 10.36 | |
| }, | |
| { | |
| "epoch": 0.40315131757674544, | |
| "grad_norm": 0.4098984897136688, | |
| "learning_rate": 8.052230685527747e-05, | |
| "loss": 1.4294, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 371, | |
| "tokens_per_second_per_gpu": 14.54 | |
| }, | |
| { | |
| "epoch": 0.4042379788101059, | |
| "grad_norm": 0.29690855741500854, | |
| "learning_rate": 8.07399347116431e-05, | |
| "loss": 1.3647, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 372, | |
| "tokens_per_second_per_gpu": 6.0 | |
| }, | |
| { | |
| "epoch": 0.40532464004346647, | |
| "grad_norm": 0.3762664496898651, | |
| "learning_rate": 8.09575625680087e-05, | |
| "loss": 1.2658, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 373, | |
| "tokens_per_second_per_gpu": 8.51 | |
| }, | |
| { | |
| "epoch": 0.40641130127682695, | |
| "grad_norm": 0.2728351354598999, | |
| "learning_rate": 8.117519042437433e-05, | |
| "loss": 1.3444, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 374, | |
| "tokens_per_second_per_gpu": 10.12 | |
| }, | |
| { | |
| "epoch": 0.40749796251018744, | |
| "grad_norm": 0.29128357768058777, | |
| "learning_rate": 8.139281828073993e-05, | |
| "loss": 1.2666, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 375, | |
| "tokens_per_second_per_gpu": 7.18 | |
| }, | |
| { | |
| "epoch": 0.4085846237435479, | |
| "grad_norm": 0.31152746081352234, | |
| "learning_rate": 8.161044613710556e-05, | |
| "loss": 1.3502, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 376, | |
| "tokens_per_second_per_gpu": 10.59 | |
| }, | |
| { | |
| "epoch": 0.40967128497690847, | |
| "grad_norm": 0.39238440990448, | |
| "learning_rate": 8.182807399347117e-05, | |
| "loss": 1.3213, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 377, | |
| "tokens_per_second_per_gpu": 10.49 | |
| }, | |
| { | |
| "epoch": 0.41075794621026895, | |
| "grad_norm": 0.333661824464798, | |
| "learning_rate": 8.204570184983678e-05, | |
| "loss": 1.299, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 378, | |
| "tokens_per_second_per_gpu": 10.07 | |
| }, | |
| { | |
| "epoch": 0.41184460744362944, | |
| "grad_norm": 0.31787121295928955, | |
| "learning_rate": 8.22633297062024e-05, | |
| "loss": 1.337, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 379, | |
| "tokens_per_second_per_gpu": 11.17 | |
| }, | |
| { | |
| "epoch": 0.4129312686769899, | |
| "grad_norm": 0.34475699067115784, | |
| "learning_rate": 8.248095756256801e-05, | |
| "loss": 1.3123, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 380, | |
| "tokens_per_second_per_gpu": 4.43 | |
| }, | |
| { | |
| "epoch": 0.41401792991035047, | |
| "grad_norm": 0.4109145402908325, | |
| "learning_rate": 8.269858541893363e-05, | |
| "loss": 1.1906, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 381, | |
| "tokens_per_second_per_gpu": 7.97 | |
| }, | |
| { | |
| "epoch": 0.41510459114371095, | |
| "grad_norm": 0.4140326976776123, | |
| "learning_rate": 8.291621327529924e-05, | |
| "loss": 1.3475, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 382, | |
| "tokens_per_second_per_gpu": 12.15 | |
| }, | |
| { | |
| "epoch": 0.41619125237707144, | |
| "grad_norm": 11.361364364624023, | |
| "learning_rate": 8.313384113166485e-05, | |
| "loss": 1.2722, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 383, | |
| "tokens_per_second_per_gpu": 10.76 | |
| }, | |
| { | |
| "epoch": 0.4172779136104319, | |
| "grad_norm": 0.32308393716812134, | |
| "learning_rate": 8.335146898803047e-05, | |
| "loss": 1.2933, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 384, | |
| "tokens_per_second_per_gpu": 11.76 | |
| }, | |
| { | |
| "epoch": 0.41836457484379247, | |
| "grad_norm": 0.4235360622406006, | |
| "learning_rate": 8.35690968443961e-05, | |
| "loss": 1.3646, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 385, | |
| "tokens_per_second_per_gpu": 12.22 | |
| }, | |
| { | |
| "epoch": 0.41945123607715296, | |
| "grad_norm": 0.328868567943573, | |
| "learning_rate": 8.37867247007617e-05, | |
| "loss": 1.2836, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 386, | |
| "tokens_per_second_per_gpu": 8.51 | |
| }, | |
| { | |
| "epoch": 0.42053789731051344, | |
| "grad_norm": 0.2766740024089813, | |
| "learning_rate": 8.400435255712732e-05, | |
| "loss": 1.3224, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 387, | |
| "tokens_per_second_per_gpu": 22.63 | |
| }, | |
| { | |
| "epoch": 0.42162455854387393, | |
| "grad_norm": 0.24991992115974426, | |
| "learning_rate": 8.422198041349292e-05, | |
| "loss": 1.2672, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 388, | |
| "tokens_per_second_per_gpu": 16.93 | |
| }, | |
| { | |
| "epoch": 0.42271121977723447, | |
| "grad_norm": 0.3662981390953064, | |
| "learning_rate": 8.443960826985855e-05, | |
| "loss": 1.35, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 389, | |
| "tokens_per_second_per_gpu": 4.18 | |
| }, | |
| { | |
| "epoch": 0.42379788101059496, | |
| "grad_norm": 0.29548513889312744, | |
| "learning_rate": 8.465723612622416e-05, | |
| "loss": 1.3663, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 390, | |
| "tokens_per_second_per_gpu": 15.31 | |
| }, | |
| { | |
| "epoch": 0.42488454224395544, | |
| "grad_norm": 0.40705665946006775, | |
| "learning_rate": 8.487486398258978e-05, | |
| "loss": 1.2764, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 391, | |
| "tokens_per_second_per_gpu": 2.64 | |
| }, | |
| { | |
| "epoch": 0.42597120347731593, | |
| "grad_norm": 0.3255072832107544, | |
| "learning_rate": 8.509249183895539e-05, | |
| "loss": 1.3454, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 392, | |
| "tokens_per_second_per_gpu": 7.5 | |
| }, | |
| { | |
| "epoch": 0.42705786471067647, | |
| "grad_norm": 0.4000544846057892, | |
| "learning_rate": 8.5310119695321e-05, | |
| "loss": 1.3469, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 393, | |
| "tokens_per_second_per_gpu": 9.15 | |
| }, | |
| { | |
| "epoch": 0.42814452594403696, | |
| "grad_norm": 0.24011757969856262, | |
| "learning_rate": 8.552774755168662e-05, | |
| "loss": 1.3387, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 394, | |
| "tokens_per_second_per_gpu": 10.82 | |
| }, | |
| { | |
| "epoch": 0.42923118717739744, | |
| "grad_norm": 0.4035041630268097, | |
| "learning_rate": 8.574537540805223e-05, | |
| "loss": 1.3531, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 395, | |
| "tokens_per_second_per_gpu": 8.15 | |
| }, | |
| { | |
| "epoch": 0.43031784841075793, | |
| "grad_norm": 0.32465001940727234, | |
| "learning_rate": 8.596300326441785e-05, | |
| "loss": 1.3492, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 396, | |
| "tokens_per_second_per_gpu": 19.86 | |
| }, | |
| { | |
| "epoch": 0.4314045096441185, | |
| "grad_norm": 0.23745566606521606, | |
| "learning_rate": 8.618063112078346e-05, | |
| "loss": 1.3386, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 397, | |
| "tokens_per_second_per_gpu": 6.56 | |
| }, | |
| { | |
| "epoch": 0.43249117087747896, | |
| "grad_norm": 0.23796981573104858, | |
| "learning_rate": 8.639825897714909e-05, | |
| "loss": 1.3251, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 398, | |
| "tokens_per_second_per_gpu": 8.67 | |
| }, | |
| { | |
| "epoch": 0.43357783211083945, | |
| "grad_norm": 0.3548803925514221, | |
| "learning_rate": 8.661588683351469e-05, | |
| "loss": 1.3045, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 399, | |
| "tokens_per_second_per_gpu": 3.65 | |
| }, | |
| { | |
| "epoch": 0.43466449334419993, | |
| "grad_norm": 0.34694230556488037, | |
| "learning_rate": 8.683351468988032e-05, | |
| "loss": 1.2992, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 400, | |
| "tokens_per_second_per_gpu": 12.68 | |
| }, | |
| { | |
| "epoch": 0.4357511545775604, | |
| "grad_norm": 0.26728835701942444, | |
| "learning_rate": 8.705114254624593e-05, | |
| "loss": 1.3031, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 401, | |
| "tokens_per_second_per_gpu": 11.46 | |
| }, | |
| { | |
| "epoch": 0.43683781581092096, | |
| "grad_norm": 0.3429200053215027, | |
| "learning_rate": 8.726877040261154e-05, | |
| "loss": 1.3605, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 402, | |
| "tokens_per_second_per_gpu": 6.95 | |
| }, | |
| { | |
| "epoch": 0.43792447704428145, | |
| "grad_norm": 0.518604040145874, | |
| "learning_rate": 8.748639825897716e-05, | |
| "loss": 1.2519, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 403, | |
| "tokens_per_second_per_gpu": 9.69 | |
| }, | |
| { | |
| "epoch": 0.43901113827764193, | |
| "grad_norm": 0.3300992548465729, | |
| "learning_rate": 8.770402611534276e-05, | |
| "loss": 1.2594, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 404, | |
| "tokens_per_second_per_gpu": 12.57 | |
| }, | |
| { | |
| "epoch": 0.4400977995110024, | |
| "grad_norm": 0.3316726088523865, | |
| "learning_rate": 8.792165397170838e-05, | |
| "loss": 1.2645, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 405, | |
| "tokens_per_second_per_gpu": 14.21 | |
| }, | |
| { | |
| "epoch": 0.44118446074436296, | |
| "grad_norm": 0.4415532648563385, | |
| "learning_rate": 8.8139281828074e-05, | |
| "loss": 1.3876, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 406, | |
| "tokens_per_second_per_gpu": 8.55 | |
| }, | |
| { | |
| "epoch": 0.44227112197772345, | |
| "grad_norm": 0.407041996717453, | |
| "learning_rate": 8.835690968443961e-05, | |
| "loss": 1.3331, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 407, | |
| "tokens_per_second_per_gpu": 7.72 | |
| }, | |
| { | |
| "epoch": 0.44335778321108393, | |
| "grad_norm": 0.29101991653442383, | |
| "learning_rate": 8.857453754080523e-05, | |
| "loss": 1.3161, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 408, | |
| "tokens_per_second_per_gpu": 12.83 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 0.27169397473335266, | |
| "learning_rate": 8.879216539717084e-05, | |
| "loss": 1.3136, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 409, | |
| "tokens_per_second_per_gpu": 5.4 | |
| }, | |
| { | |
| "epoch": 0.44553110567780496, | |
| "grad_norm": 0.5777682065963745, | |
| "learning_rate": 8.900979325353645e-05, | |
| "loss": 1.4144, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 410, | |
| "tokens_per_second_per_gpu": 11.0 | |
| }, | |
| { | |
| "epoch": 0.44661776691116545, | |
| "grad_norm": 0.35588160157203674, | |
| "learning_rate": 8.922742110990208e-05, | |
| "loss": 1.4217, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 411, | |
| "tokens_per_second_per_gpu": 7.37 | |
| }, | |
| { | |
| "epoch": 0.44770442814452593, | |
| "grad_norm": 0.3968086242675781, | |
| "learning_rate": 8.944504896626768e-05, | |
| "loss": 1.3809, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 412, | |
| "tokens_per_second_per_gpu": 8.14 | |
| }, | |
| { | |
| "epoch": 0.4487910893778864, | |
| "grad_norm": 0.2539708912372589, | |
| "learning_rate": 8.96626768226333e-05, | |
| "loss": 1.2918, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 413, | |
| "tokens_per_second_per_gpu": 11.79 | |
| }, | |
| { | |
| "epoch": 0.44987775061124696, | |
| "grad_norm": 0.434427410364151, | |
| "learning_rate": 8.988030467899892e-05, | |
| "loss": 1.3148, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 414, | |
| "tokens_per_second_per_gpu": 8.13 | |
| }, | |
| { | |
| "epoch": 0.45096441184460745, | |
| "grad_norm": 0.37377288937568665, | |
| "learning_rate": 9.009793253536452e-05, | |
| "loss": 1.4088, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 415, | |
| "tokens_per_second_per_gpu": 11.43 | |
| }, | |
| { | |
| "epoch": 0.45205107307796794, | |
| "grad_norm": 0.350955605506897, | |
| "learning_rate": 9.031556039173015e-05, | |
| "loss": 1.3314, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 416, | |
| "tokens_per_second_per_gpu": 8.76 | |
| }, | |
| { | |
| "epoch": 0.4531377343113284, | |
| "grad_norm": 0.27260836958885193, | |
| "learning_rate": 9.053318824809576e-05, | |
| "loss": 1.3742, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 417, | |
| "tokens_per_second_per_gpu": 7.21 | |
| }, | |
| { | |
| "epoch": 0.45422439554468896, | |
| "grad_norm": 0.49745768308639526, | |
| "learning_rate": 9.075081610446138e-05, | |
| "loss": 1.2976, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 418, | |
| "tokens_per_second_per_gpu": 4.45 | |
| }, | |
| { | |
| "epoch": 0.45531105677804945, | |
| "grad_norm": 0.3883240222930908, | |
| "learning_rate": 9.096844396082699e-05, | |
| "loss": 1.4907, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 419, | |
| "tokens_per_second_per_gpu": 7.1 | |
| }, | |
| { | |
| "epoch": 0.45639771801140994, | |
| "grad_norm": 0.29086917638778687, | |
| "learning_rate": 9.11860718171926e-05, | |
| "loss": 1.2976, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 420, | |
| "tokens_per_second_per_gpu": 10.63 | |
| }, | |
| { | |
| "epoch": 0.4574843792447704, | |
| "grad_norm": 0.2820741832256317, | |
| "learning_rate": 9.140369967355822e-05, | |
| "loss": 1.3603, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 421, | |
| "tokens_per_second_per_gpu": 8.32 | |
| }, | |
| { | |
| "epoch": 0.45857104047813096, | |
| "grad_norm": 0.24028311669826508, | |
| "learning_rate": 9.162132752992385e-05, | |
| "loss": 1.2872, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 422, | |
| "tokens_per_second_per_gpu": 5.67 | |
| }, | |
| { | |
| "epoch": 0.45965770171149145, | |
| "grad_norm": 0.3218623101711273, | |
| "learning_rate": 9.183895538628945e-05, | |
| "loss": 1.3215, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 423, | |
| "tokens_per_second_per_gpu": 16.73 | |
| }, | |
| { | |
| "epoch": 0.46074436294485194, | |
| "grad_norm": 0.29078397154808044, | |
| "learning_rate": 9.205658324265506e-05, | |
| "loss": 1.333, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 424, | |
| "tokens_per_second_per_gpu": 8.22 | |
| }, | |
| { | |
| "epoch": 0.4618310241782124, | |
| "grad_norm": 0.32058653235435486, | |
| "learning_rate": 9.227421109902067e-05, | |
| "loss": 1.3885, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 425, | |
| "tokens_per_second_per_gpu": 7.94 | |
| }, | |
| { | |
| "epoch": 0.46291768541157297, | |
| "grad_norm": 0.35527729988098145, | |
| "learning_rate": 9.249183895538629e-05, | |
| "loss": 1.3978, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 426, | |
| "tokens_per_second_per_gpu": 8.62 | |
| }, | |
| { | |
| "epoch": 0.46400434664493345, | |
| "grad_norm": 0.2507789433002472, | |
| "learning_rate": 9.270946681175192e-05, | |
| "loss": 1.2906, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 427, | |
| "tokens_per_second_per_gpu": 11.75 | |
| }, | |
| { | |
| "epoch": 0.46509100787829394, | |
| "grad_norm": 0.30308225750923157, | |
| "learning_rate": 9.292709466811752e-05, | |
| "loss": 1.2684, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 428, | |
| "tokens_per_second_per_gpu": 13.36 | |
| }, | |
| { | |
| "epoch": 0.4661776691116544, | |
| "grad_norm": 0.39585697650909424, | |
| "learning_rate": 9.314472252448314e-05, | |
| "loss": 1.3004, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 429, | |
| "tokens_per_second_per_gpu": 11.38 | |
| }, | |
| { | |
| "epoch": 0.46726433034501497, | |
| "grad_norm": 0.5487492680549622, | |
| "learning_rate": 9.336235038084876e-05, | |
| "loss": 1.3106, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 430, | |
| "tokens_per_second_per_gpu": 4.27 | |
| }, | |
| { | |
| "epoch": 0.46835099157837545, | |
| "grad_norm": 0.2982177734375, | |
| "learning_rate": 9.357997823721437e-05, | |
| "loss": 1.3277, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 431, | |
| "tokens_per_second_per_gpu": 7.12 | |
| }, | |
| { | |
| "epoch": 0.46943765281173594, | |
| "grad_norm": 0.36442676186561584, | |
| "learning_rate": 9.379760609357998e-05, | |
| "loss": 1.2501, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 432, | |
| "tokens_per_second_per_gpu": 12.4 | |
| }, | |
| { | |
| "epoch": 0.4705243140450964, | |
| "grad_norm": 0.35135430097579956, | |
| "learning_rate": 9.40152339499456e-05, | |
| "loss": 1.2797, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 433, | |
| "tokens_per_second_per_gpu": 9.31 | |
| }, | |
| { | |
| "epoch": 0.47161097527845697, | |
| "grad_norm": 0.4192192256450653, | |
| "learning_rate": 9.423286180631121e-05, | |
| "loss": 1.3394, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 434, | |
| "tokens_per_second_per_gpu": 4.91 | |
| }, | |
| { | |
| "epoch": 0.47269763651181745, | |
| "grad_norm": 0.29120954871177673, | |
| "learning_rate": 9.445048966267683e-05, | |
| "loss": 1.2893, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 435, | |
| "tokens_per_second_per_gpu": 14.84 | |
| }, | |
| { | |
| "epoch": 0.47378429774517794, | |
| "grad_norm": 0.33978521823883057, | |
| "learning_rate": 9.466811751904244e-05, | |
| "loss": 1.2134, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 436, | |
| "tokens_per_second_per_gpu": 21.42 | |
| }, | |
| { | |
| "epoch": 0.4748709589785384, | |
| "grad_norm": 0.4215603768825531, | |
| "learning_rate": 9.488574537540805e-05, | |
| "loss": 1.2817, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 437, | |
| "tokens_per_second_per_gpu": 13.59 | |
| }, | |
| { | |
| "epoch": 0.4759576202118989, | |
| "grad_norm": 0.28041815757751465, | |
| "learning_rate": 9.510337323177367e-05, | |
| "loss": 1.31, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 438, | |
| "tokens_per_second_per_gpu": 10.09 | |
| }, | |
| { | |
| "epoch": 0.47704428144525945, | |
| "grad_norm": 0.42334699630737305, | |
| "learning_rate": 9.532100108813928e-05, | |
| "loss": 1.4502, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 439, | |
| "tokens_per_second_per_gpu": 8.22 | |
| }, | |
| { | |
| "epoch": 0.47813094267861994, | |
| "grad_norm": 0.4922485947608948, | |
| "learning_rate": 9.553862894450491e-05, | |
| "loss": 1.2448, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 440, | |
| "tokens_per_second_per_gpu": 13.82 | |
| }, | |
| { | |
| "epoch": 0.4792176039119804, | |
| "grad_norm": 0.3534889221191406, | |
| "learning_rate": 9.575625680087051e-05, | |
| "loss": 1.4274, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 441, | |
| "tokens_per_second_per_gpu": 8.09 | |
| }, | |
| { | |
| "epoch": 0.4803042651453409, | |
| "grad_norm": 0.32689327001571655, | |
| "learning_rate": 9.597388465723614e-05, | |
| "loss": 1.3049, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 442, | |
| "tokens_per_second_per_gpu": 8.39 | |
| }, | |
| { | |
| "epoch": 0.48139092637870146, | |
| "grad_norm": 0.3258620798587799, | |
| "learning_rate": 9.619151251360175e-05, | |
| "loss": 1.5079, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 443, | |
| "tokens_per_second_per_gpu": 10.98 | |
| }, | |
| { | |
| "epoch": 0.48247758761206194, | |
| "grad_norm": 0.2975975573062897, | |
| "learning_rate": 9.640914036996736e-05, | |
| "loss": 1.329, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 444, | |
| "tokens_per_second_per_gpu": 17.64 | |
| }, | |
| { | |
| "epoch": 0.48356424884542243, | |
| "grad_norm": 0.4096674919128418, | |
| "learning_rate": 9.662676822633298e-05, | |
| "loss": 1.3798, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 445, | |
| "tokens_per_second_per_gpu": 3.97 | |
| }, | |
| { | |
| "epoch": 0.4846509100787829, | |
| "grad_norm": 0.4106085002422333, | |
| "learning_rate": 9.684439608269859e-05, | |
| "loss": 1.3706, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 446, | |
| "tokens_per_second_per_gpu": 8.34 | |
| }, | |
| { | |
| "epoch": 0.48573757131214346, | |
| "grad_norm": 0.3184206187725067, | |
| "learning_rate": 9.70620239390642e-05, | |
| "loss": 1.3194, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 447, | |
| "tokens_per_second_per_gpu": 5.3 | |
| }, | |
| { | |
| "epoch": 0.48682423254550394, | |
| "grad_norm": 0.2441464215517044, | |
| "learning_rate": 9.727965179542982e-05, | |
| "loss": 1.2765, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 448, | |
| "tokens_per_second_per_gpu": 14.94 | |
| }, | |
| { | |
| "epoch": 0.48791089377886443, | |
| "grad_norm": 0.33418238162994385, | |
| "learning_rate": 9.749727965179543e-05, | |
| "loss": 1.2979, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 449, | |
| "tokens_per_second_per_gpu": 6.32 | |
| }, | |
| { | |
| "epoch": 0.4889975550122249, | |
| "grad_norm": 0.40633317828178406, | |
| "learning_rate": 9.771490750816105e-05, | |
| "loss": 1.2871, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 450, | |
| "tokens_per_second_per_gpu": 10.03 | |
| }, | |
| { | |
| "epoch": 0.49008421624558546, | |
| "grad_norm": 0.22635316848754883, | |
| "learning_rate": 9.793253536452667e-05, | |
| "loss": 1.3548, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 451, | |
| "tokens_per_second_per_gpu": 11.36 | |
| }, | |
| { | |
| "epoch": 0.49117087747894594, | |
| "grad_norm": 0.2799948751926422, | |
| "learning_rate": 9.815016322089227e-05, | |
| "loss": 1.39, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 452, | |
| "tokens_per_second_per_gpu": 8.6 | |
| }, | |
| { | |
| "epoch": 0.49225753871230643, | |
| "grad_norm": 0.38461124897003174, | |
| "learning_rate": 9.83677910772579e-05, | |
| "loss": 1.252, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 453, | |
| "tokens_per_second_per_gpu": 7.3 | |
| }, | |
| { | |
| "epoch": 0.4933441999456669, | |
| "grad_norm": 0.36640533804893494, | |
| "learning_rate": 9.85854189336235e-05, | |
| "loss": 1.3281, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 454, | |
| "tokens_per_second_per_gpu": 9.01 | |
| }, | |
| { | |
| "epoch": 0.49443086117902746, | |
| "grad_norm": 0.24314464628696442, | |
| "learning_rate": 9.880304678998913e-05, | |
| "loss": 1.2171, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.56, | |
| "memory/max_allocated (GiB)": 72.56, | |
| "step": 455, | |
| "tokens_per_second_per_gpu": 17.3 | |
| }, | |
| { | |
| "epoch": 0.49551752241238795, | |
| "grad_norm": 0.30634331703186035, | |
| "learning_rate": 9.902067464635474e-05, | |
| "loss": 1.3218, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 456, | |
| "tokens_per_second_per_gpu": 6.25 | |
| }, | |
| { | |
| "epoch": 0.49660418364574843, | |
| "grad_norm": 0.31617459654808044, | |
| "learning_rate": 9.923830250272034e-05, | |
| "loss": 1.3088, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 457, | |
| "tokens_per_second_per_gpu": 24.48 | |
| }, | |
| { | |
| "epoch": 0.4976908448791089, | |
| "grad_norm": 0.3126237392425537, | |
| "learning_rate": 9.945593035908597e-05, | |
| "loss": 1.3127, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 458, | |
| "tokens_per_second_per_gpu": 5.2 | |
| }, | |
| { | |
| "epoch": 0.49877750611246946, | |
| "grad_norm": 0.3157085180282593, | |
| "learning_rate": 9.967355821545158e-05, | |
| "loss": 1.3164, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 459, | |
| "tokens_per_second_per_gpu": 20.17 | |
| }, | |
| { | |
| "epoch": 0.49986416734582995, | |
| "grad_norm": 0.26757919788360596, | |
| "learning_rate": 9.98911860718172e-05, | |
| "loss": 1.3107, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 460, | |
| "tokens_per_second_per_gpu": 11.03 | |
| }, | |
| { | |
| "epoch": 0.49986416734582995, | |
| "eval_loss": 1.3248014450073242, | |
| "eval_runtime": 2589.4973, | |
| "eval_samples_per_second": 0.579, | |
| "eval_steps_per_second": 0.145, | |
| "memory/device_reserved (GiB)": 76.49, | |
| "memory/max_active (GiB)": 58.27, | |
| "memory/max_allocated (GiB)": 58.27, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5009508285791905, | |
| "grad_norm": 0.38552331924438477, | |
| "learning_rate": 0.00010010881392818281, | |
| "loss": 1.2846, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 461, | |
| "tokens_per_second_per_gpu": 5.74 | |
| }, | |
| { | |
| "epoch": 0.5020374898125509, | |
| "grad_norm": 0.2443004995584488, | |
| "learning_rate": 0.00010032644178454843, | |
| "loss": 1.3173, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 462, | |
| "tokens_per_second_per_gpu": 6.0 | |
| }, | |
| { | |
| "epoch": 0.5031241510459115, | |
| "grad_norm": 0.3084656298160553, | |
| "learning_rate": 0.00010054406964091405, | |
| "loss": 1.4319, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 463, | |
| "tokens_per_second_per_gpu": 10.89 | |
| }, | |
| { | |
| "epoch": 0.5042108122792719, | |
| "grad_norm": 0.3311648666858673, | |
| "learning_rate": 0.00010076169749727967, | |
| "loss": 1.3346, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 464, | |
| "tokens_per_second_per_gpu": 7.76 | |
| }, | |
| { | |
| "epoch": 0.5052974735126324, | |
| "grad_norm": 0.23320050537586212, | |
| "learning_rate": 0.00010097932535364527, | |
| "loss": 1.2367, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 465, | |
| "tokens_per_second_per_gpu": 5.35 | |
| }, | |
| { | |
| "epoch": 0.506384134745993, | |
| "grad_norm": 0.28411737084388733, | |
| "learning_rate": 0.00010119695321001088, | |
| "loss": 1.3087, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 466, | |
| "tokens_per_second_per_gpu": 14.89 | |
| }, | |
| { | |
| "epoch": 0.5074707959793534, | |
| "grad_norm": 0.3009752035140991, | |
| "learning_rate": 0.0001014145810663765, | |
| "loss": 1.3312, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 467, | |
| "tokens_per_second_per_gpu": 10.22 | |
| }, | |
| { | |
| "epoch": 0.508557457212714, | |
| "grad_norm": 0.38802680373191833, | |
| "learning_rate": 0.00010163220892274212, | |
| "loss": 1.3468, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 468, | |
| "tokens_per_second_per_gpu": 7.39 | |
| }, | |
| { | |
| "epoch": 0.5096441184460745, | |
| "grad_norm": 0.4274388551712036, | |
| "learning_rate": 0.00010184983677910774, | |
| "loss": 1.3187, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 469, | |
| "tokens_per_second_per_gpu": 3.9 | |
| }, | |
| { | |
| "epoch": 0.5107307796794349, | |
| "grad_norm": 0.34262993931770325, | |
| "learning_rate": 0.00010206746463547334, | |
| "loss": 1.3601, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 470, | |
| "tokens_per_second_per_gpu": 5.05 | |
| }, | |
| { | |
| "epoch": 0.5118174409127955, | |
| "grad_norm": 0.2815151512622833, | |
| "learning_rate": 0.00010228509249183895, | |
| "loss": 1.2668, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 471, | |
| "tokens_per_second_per_gpu": 5.58 | |
| }, | |
| { | |
| "epoch": 0.5129041021461559, | |
| "grad_norm": 0.27684006094932556, | |
| "learning_rate": 0.00010250272034820459, | |
| "loss": 1.2927, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 472, | |
| "tokens_per_second_per_gpu": 10.28 | |
| }, | |
| { | |
| "epoch": 0.5139907633795164, | |
| "grad_norm": 0.4730660915374756, | |
| "learning_rate": 0.00010272034820457019, | |
| "loss": 1.4083, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 473, | |
| "tokens_per_second_per_gpu": 6.62 | |
| }, | |
| { | |
| "epoch": 0.515077424612877, | |
| "grad_norm": 0.2745938301086426, | |
| "learning_rate": 0.0001029379760609358, | |
| "loss": 1.433, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 474, | |
| "tokens_per_second_per_gpu": 5.67 | |
| }, | |
| { | |
| "epoch": 0.5161640858462374, | |
| "grad_norm": 0.3664637506008148, | |
| "learning_rate": 0.00010315560391730142, | |
| "loss": 1.3592, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 475, | |
| "tokens_per_second_per_gpu": 7.62 | |
| }, | |
| { | |
| "epoch": 0.517250747079598, | |
| "grad_norm": 0.23275315761566162, | |
| "learning_rate": 0.00010337323177366705, | |
| "loss": 1.3168, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 476, | |
| "tokens_per_second_per_gpu": 6.9 | |
| }, | |
| { | |
| "epoch": 0.5183374083129584, | |
| "grad_norm": 0.4999582767486572, | |
| "learning_rate": 0.00010359085963003266, | |
| "loss": 1.3411, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 477, | |
| "tokens_per_second_per_gpu": 7.92 | |
| }, | |
| { | |
| "epoch": 0.5194240695463189, | |
| "grad_norm": 0.28805461525917053, | |
| "learning_rate": 0.00010380848748639826, | |
| "loss": 1.2553, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 478, | |
| "tokens_per_second_per_gpu": 5.61 | |
| }, | |
| { | |
| "epoch": 0.5205107307796795, | |
| "grad_norm": 0.18844760954380035, | |
| "learning_rate": 0.00010402611534276387, | |
| "loss": 1.4331, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 479, | |
| "tokens_per_second_per_gpu": 8.61 | |
| }, | |
| { | |
| "epoch": 0.5215973920130399, | |
| "grad_norm": 0.24722449481487274, | |
| "learning_rate": 0.00010424374319912949, | |
| "loss": 1.3254, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 480, | |
| "tokens_per_second_per_gpu": 14.19 | |
| }, | |
| { | |
| "epoch": 0.5226840532464004, | |
| "grad_norm": 0.35248225927352905, | |
| "learning_rate": 0.00010446137105549511, | |
| "loss": 1.3779, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 481, | |
| "tokens_per_second_per_gpu": 13.32 | |
| }, | |
| { | |
| "epoch": 0.523770714479761, | |
| "grad_norm": 0.2907487154006958, | |
| "learning_rate": 0.00010467899891186073, | |
| "loss": 1.3434, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 482, | |
| "tokens_per_second_per_gpu": 11.38 | |
| }, | |
| { | |
| "epoch": 0.5248573757131214, | |
| "grad_norm": 0.3520384728908539, | |
| "learning_rate": 0.00010489662676822634, | |
| "loss": 1.3544, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 483, | |
| "tokens_per_second_per_gpu": 3.99 | |
| }, | |
| { | |
| "epoch": 0.525944036946482, | |
| "grad_norm": 0.227678120136261, | |
| "learning_rate": 0.00010511425462459194, | |
| "loss": 1.3704, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 484, | |
| "tokens_per_second_per_gpu": 11.38 | |
| }, | |
| { | |
| "epoch": 0.5270306981798424, | |
| "grad_norm": 0.3892858922481537, | |
| "learning_rate": 0.00010533188248095758, | |
| "loss": 1.2924, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 485, | |
| "tokens_per_second_per_gpu": 7.89 | |
| }, | |
| { | |
| "epoch": 0.5281173594132029, | |
| "grad_norm": 0.2113732397556305, | |
| "learning_rate": 0.00010554951033732318, | |
| "loss": 1.3218, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 486, | |
| "tokens_per_second_per_gpu": 7.49 | |
| }, | |
| { | |
| "epoch": 0.5292040206465635, | |
| "grad_norm": 0.2941826581954956, | |
| "learning_rate": 0.0001057671381936888, | |
| "loss": 1.2488, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 487, | |
| "tokens_per_second_per_gpu": 5.93 | |
| }, | |
| { | |
| "epoch": 0.5302906818799239, | |
| "grad_norm": 0.28975728154182434, | |
| "learning_rate": 0.00010598476605005441, | |
| "loss": 1.3089, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 488, | |
| "tokens_per_second_per_gpu": 16.55 | |
| }, | |
| { | |
| "epoch": 0.5313773431132844, | |
| "grad_norm": 0.20328691601753235, | |
| "learning_rate": 0.00010620239390642001, | |
| "loss": 1.2526, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 489, | |
| "tokens_per_second_per_gpu": 9.82 | |
| }, | |
| { | |
| "epoch": 0.532464004346645, | |
| "grad_norm": 0.25590065121650696, | |
| "learning_rate": 0.00010642002176278565, | |
| "loss": 1.371, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 490, | |
| "tokens_per_second_per_gpu": 5.27 | |
| }, | |
| { | |
| "epoch": 0.5335506655800054, | |
| "grad_norm": 0.26725253462791443, | |
| "learning_rate": 0.00010663764961915125, | |
| "loss": 1.3634, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 491, | |
| "tokens_per_second_per_gpu": 6.66 | |
| }, | |
| { | |
| "epoch": 0.534637326813366, | |
| "grad_norm": 0.45550161600112915, | |
| "learning_rate": 0.00010685527747551687, | |
| "loss": 1.2415, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 492, | |
| "tokens_per_second_per_gpu": 7.67 | |
| }, | |
| { | |
| "epoch": 0.5357239880467264, | |
| "grad_norm": 0.2559080719947815, | |
| "learning_rate": 0.00010707290533188248, | |
| "loss": 1.2822, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 493, | |
| "tokens_per_second_per_gpu": 9.99 | |
| }, | |
| { | |
| "epoch": 0.5368106492800869, | |
| "grad_norm": 0.30908021330833435, | |
| "learning_rate": 0.00010729053318824811, | |
| "loss": 1.2925, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 494, | |
| "tokens_per_second_per_gpu": 7.87 | |
| }, | |
| { | |
| "epoch": 0.5378973105134475, | |
| "grad_norm": 0.2990354001522064, | |
| "learning_rate": 0.00010750816104461372, | |
| "loss": 1.2959, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 495, | |
| "tokens_per_second_per_gpu": 10.95 | |
| }, | |
| { | |
| "epoch": 0.5389839717468079, | |
| "grad_norm": 0.23019470274448395, | |
| "learning_rate": 0.00010772578890097934, | |
| "loss": 1.3756, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 496, | |
| "tokens_per_second_per_gpu": 5.38 | |
| }, | |
| { | |
| "epoch": 0.5400706329801684, | |
| "grad_norm": 0.22143608331680298, | |
| "learning_rate": 0.00010794341675734494, | |
| "loss": 1.2849, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 497, | |
| "tokens_per_second_per_gpu": 6.49 | |
| }, | |
| { | |
| "epoch": 0.541157294213529, | |
| "grad_norm": 0.29254642128944397, | |
| "learning_rate": 0.00010816104461371055, | |
| "loss": 1.4031, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 498, | |
| "tokens_per_second_per_gpu": 11.27 | |
| }, | |
| { | |
| "epoch": 0.5422439554468894, | |
| "grad_norm": 0.28323113918304443, | |
| "learning_rate": 0.00010837867247007618, | |
| "loss": 1.2259, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 499, | |
| "tokens_per_second_per_gpu": 13.29 | |
| }, | |
| { | |
| "epoch": 0.54333061668025, | |
| "grad_norm": 0.4100179374217987, | |
| "learning_rate": 0.00010859630032644179, | |
| "loss": 1.3404, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 500, | |
| "tokens_per_second_per_gpu": 3.64 | |
| }, | |
| { | |
| "epoch": 0.5444172779136104, | |
| "grad_norm": 0.2791884243488312, | |
| "learning_rate": 0.0001088139281828074, | |
| "loss": 1.3569, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 501, | |
| "tokens_per_second_per_gpu": 8.29 | |
| }, | |
| { | |
| "epoch": 0.5455039391469709, | |
| "grad_norm": 0.36715173721313477, | |
| "learning_rate": 0.000109031556039173, | |
| "loss": 1.3005, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 502, | |
| "tokens_per_second_per_gpu": 10.94 | |
| }, | |
| { | |
| "epoch": 0.5465906003803315, | |
| "grad_norm": 0.37356871366500854, | |
| "learning_rate": 0.00010924918389553865, | |
| "loss": 1.2916, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 503, | |
| "tokens_per_second_per_gpu": 6.98 | |
| }, | |
| { | |
| "epoch": 0.5476772616136919, | |
| "grad_norm": 0.32009172439575195, | |
| "learning_rate": 0.00010946681175190425, | |
| "loss": 1.3546, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 504, | |
| "tokens_per_second_per_gpu": 8.25 | |
| }, | |
| { | |
| "epoch": 0.5487639228470524, | |
| "grad_norm": 0.3384503126144409, | |
| "learning_rate": 0.00010968443960826986, | |
| "loss": 1.3705, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 505, | |
| "tokens_per_second_per_gpu": 12.22 | |
| }, | |
| { | |
| "epoch": 0.549850584080413, | |
| "grad_norm": 0.25582775473594666, | |
| "learning_rate": 0.00010990206746463547, | |
| "loss": 1.2996, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 506, | |
| "tokens_per_second_per_gpu": 6.66 | |
| }, | |
| { | |
| "epoch": 0.5509372453137734, | |
| "grad_norm": 0.24928079545497894, | |
| "learning_rate": 0.0001101196953210011, | |
| "loss": 1.3825, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 507, | |
| "tokens_per_second_per_gpu": 8.01 | |
| }, | |
| { | |
| "epoch": 0.552023906547134, | |
| "grad_norm": 0.3717151880264282, | |
| "learning_rate": 0.00011033732317736671, | |
| "loss": 1.2864, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 508, | |
| "tokens_per_second_per_gpu": 7.28 | |
| }, | |
| { | |
| "epoch": 0.5531105677804944, | |
| "grad_norm": 0.2580597698688507, | |
| "learning_rate": 0.00011055495103373233, | |
| "loss": 1.2292, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 509, | |
| "tokens_per_second_per_gpu": 11.93 | |
| }, | |
| { | |
| "epoch": 0.5541972290138549, | |
| "grad_norm": 0.26424112915992737, | |
| "learning_rate": 0.00011077257889009793, | |
| "loss": 1.1711, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 510, | |
| "tokens_per_second_per_gpu": 12.46 | |
| }, | |
| { | |
| "epoch": 0.5552838902472155, | |
| "grad_norm": 0.3436356782913208, | |
| "learning_rate": 0.00011099020674646354, | |
| "loss": 1.1875, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 511, | |
| "tokens_per_second_per_gpu": 7.67 | |
| }, | |
| { | |
| "epoch": 0.5563705514805759, | |
| "grad_norm": 0.2892463505268097, | |
| "learning_rate": 0.00011120783460282917, | |
| "loss": 1.2819, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 512, | |
| "tokens_per_second_per_gpu": 10.93 | |
| }, | |
| { | |
| "epoch": 0.5574572127139364, | |
| "grad_norm": 0.28604984283447266, | |
| "learning_rate": 0.00011142546245919478, | |
| "loss": 1.3623, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 513, | |
| "tokens_per_second_per_gpu": 4.85 | |
| }, | |
| { | |
| "epoch": 0.5585438739472969, | |
| "grad_norm": 0.2967474162578583, | |
| "learning_rate": 0.0001116430903155604, | |
| "loss": 1.4183, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 514, | |
| "tokens_per_second_per_gpu": 5.93 | |
| }, | |
| { | |
| "epoch": 0.5596305351806574, | |
| "grad_norm": 0.2249719500541687, | |
| "learning_rate": 0.000111860718171926, | |
| "loss": 1.3126, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 515, | |
| "tokens_per_second_per_gpu": 9.05 | |
| }, | |
| { | |
| "epoch": 0.560717196414018, | |
| "grad_norm": 0.313816636800766, | |
| "learning_rate": 0.00011207834602829164, | |
| "loss": 1.3466, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 516, | |
| "tokens_per_second_per_gpu": 14.09 | |
| }, | |
| { | |
| "epoch": 0.5618038576473784, | |
| "grad_norm": 0.3536757826805115, | |
| "learning_rate": 0.00011229597388465725, | |
| "loss": 1.3539, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 517, | |
| "tokens_per_second_per_gpu": 2.6 | |
| }, | |
| { | |
| "epoch": 0.5628905188807389, | |
| "grad_norm": 0.3303036391735077, | |
| "learning_rate": 0.00011251360174102285, | |
| "loss": 1.3216, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 518, | |
| "tokens_per_second_per_gpu": 5.73 | |
| }, | |
| { | |
| "epoch": 0.5639771801140995, | |
| "grad_norm": 0.8057336807250977, | |
| "learning_rate": 0.00011273122959738847, | |
| "loss": 1.3524, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 519, | |
| "tokens_per_second_per_gpu": 9.34 | |
| }, | |
| { | |
| "epoch": 0.5650638413474599, | |
| "grad_norm": 0.32165050506591797, | |
| "learning_rate": 0.00011294885745375408, | |
| "loss": 1.2361, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 520, | |
| "tokens_per_second_per_gpu": 15.07 | |
| }, | |
| { | |
| "epoch": 0.5661505025808204, | |
| "grad_norm": 0.3340268135070801, | |
| "learning_rate": 0.00011316648531011971, | |
| "loss": 1.2522, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 521, | |
| "tokens_per_second_per_gpu": 4.66 | |
| }, | |
| { | |
| "epoch": 0.5672371638141809, | |
| "grad_norm": 0.29636886715888977, | |
| "learning_rate": 0.00011338411316648532, | |
| "loss": 1.2758, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 522, | |
| "tokens_per_second_per_gpu": 17.46 | |
| }, | |
| { | |
| "epoch": 0.5683238250475414, | |
| "grad_norm": 0.27805787324905396, | |
| "learning_rate": 0.00011360174102285092, | |
| "loss": 1.2462, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 523, | |
| "tokens_per_second_per_gpu": 11.7 | |
| }, | |
| { | |
| "epoch": 0.569410486280902, | |
| "grad_norm": 0.26508042216300964, | |
| "learning_rate": 0.00011381936887921654, | |
| "loss": 1.2823, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 524, | |
| "tokens_per_second_per_gpu": 13.4 | |
| }, | |
| { | |
| "epoch": 0.5704971475142624, | |
| "grad_norm": 0.39035770297050476, | |
| "learning_rate": 0.00011403699673558216, | |
| "loss": 1.3125, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 525, | |
| "tokens_per_second_per_gpu": 22.0 | |
| }, | |
| { | |
| "epoch": 0.5715838087476229, | |
| "grad_norm": 0.2500219941139221, | |
| "learning_rate": 0.00011425462459194778, | |
| "loss": 1.2797, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 526, | |
| "tokens_per_second_per_gpu": 8.63 | |
| }, | |
| { | |
| "epoch": 0.5726704699809835, | |
| "grad_norm": 0.3673296570777893, | |
| "learning_rate": 0.00011447225244831339, | |
| "loss": 1.3597, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 527, | |
| "tokens_per_second_per_gpu": 10.53 | |
| }, | |
| { | |
| "epoch": 0.5737571312143439, | |
| "grad_norm": 0.3064488172531128, | |
| "learning_rate": 0.000114689880304679, | |
| "loss": 1.3437, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 528, | |
| "tokens_per_second_per_gpu": 5.12 | |
| }, | |
| { | |
| "epoch": 0.5748437924477044, | |
| "grad_norm": 0.3089617192745209, | |
| "learning_rate": 0.00011490750816104463, | |
| "loss": 1.4388, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 529, | |
| "tokens_per_second_per_gpu": 15.23 | |
| }, | |
| { | |
| "epoch": 0.5759304536810649, | |
| "grad_norm": 0.3777344524860382, | |
| "learning_rate": 0.00011512513601741025, | |
| "loss": 1.408, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 530, | |
| "tokens_per_second_per_gpu": 2.85 | |
| }, | |
| { | |
| "epoch": 0.5770171149144254, | |
| "grad_norm": 0.3356127440929413, | |
| "learning_rate": 0.00011534276387377585, | |
| "loss": 1.3556, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 531, | |
| "tokens_per_second_per_gpu": 6.44 | |
| }, | |
| { | |
| "epoch": 0.578103776147786, | |
| "grad_norm": 0.20577749609947205, | |
| "learning_rate": 0.00011556039173014146, | |
| "loss": 1.2447, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 532, | |
| "tokens_per_second_per_gpu": 4.68 | |
| }, | |
| { | |
| "epoch": 0.5791904373811464, | |
| "grad_norm": 0.3717140555381775, | |
| "learning_rate": 0.00011577801958650707, | |
| "loss": 1.3584, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 533, | |
| "tokens_per_second_per_gpu": 3.39 | |
| }, | |
| { | |
| "epoch": 0.5802770986145069, | |
| "grad_norm": 0.2820013165473938, | |
| "learning_rate": 0.0001159956474428727, | |
| "loss": 1.2907, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 534, | |
| "tokens_per_second_per_gpu": 6.41 | |
| }, | |
| { | |
| "epoch": 0.5813637598478675, | |
| "grad_norm": 0.31808245182037354, | |
| "learning_rate": 0.00011621327529923831, | |
| "loss": 1.2688, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 535, | |
| "tokens_per_second_per_gpu": 10.33 | |
| }, | |
| { | |
| "epoch": 0.5824504210812279, | |
| "grad_norm": 3.1854968070983887, | |
| "learning_rate": 0.00011643090315560391, | |
| "loss": 1.2972, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 536, | |
| "tokens_per_second_per_gpu": 7.82 | |
| }, | |
| { | |
| "epoch": 0.5835370823145885, | |
| "grad_norm": 0.21994483470916748, | |
| "learning_rate": 0.00011664853101196953, | |
| "loss": 1.28, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 537, | |
| "tokens_per_second_per_gpu": 11.99 | |
| }, | |
| { | |
| "epoch": 0.5846237435479489, | |
| "grad_norm": 0.3384319245815277, | |
| "learning_rate": 0.00011686615886833516, | |
| "loss": 1.4636, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 538, | |
| "tokens_per_second_per_gpu": 4.8 | |
| }, | |
| { | |
| "epoch": 0.5857104047813094, | |
| "grad_norm": 0.3604453504085541, | |
| "learning_rate": 0.00011708378672470077, | |
| "loss": 1.303, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 539, | |
| "tokens_per_second_per_gpu": 7.68 | |
| }, | |
| { | |
| "epoch": 0.58679706601467, | |
| "grad_norm": 0.39992472529411316, | |
| "learning_rate": 0.00011730141458106638, | |
| "loss": 1.2977, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 540, | |
| "tokens_per_second_per_gpu": 9.3 | |
| }, | |
| { | |
| "epoch": 0.5878837272480304, | |
| "grad_norm": 0.3333197236061096, | |
| "learning_rate": 0.000117519042437432, | |
| "loss": 1.4167, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 541, | |
| "tokens_per_second_per_gpu": 8.81 | |
| }, | |
| { | |
| "epoch": 0.5889703884813909, | |
| "grad_norm": 0.24491962790489197, | |
| "learning_rate": 0.0001177366702937976, | |
| "loss": 1.3139, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 542, | |
| "tokens_per_second_per_gpu": 15.81 | |
| }, | |
| { | |
| "epoch": 0.5900570497147515, | |
| "grad_norm": 0.3384905159473419, | |
| "learning_rate": 0.00011795429815016324, | |
| "loss": 1.3376, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 543, | |
| "tokens_per_second_per_gpu": 12.59 | |
| }, | |
| { | |
| "epoch": 0.5911437109481119, | |
| "grad_norm": 0.276775598526001, | |
| "learning_rate": 0.00011817192600652884, | |
| "loss": 1.3453, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 544, | |
| "tokens_per_second_per_gpu": 6.16 | |
| }, | |
| { | |
| "epoch": 0.5922303721814725, | |
| "grad_norm": 0.34516045451164246, | |
| "learning_rate": 0.00011838955386289445, | |
| "loss": 1.3117, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 545, | |
| "tokens_per_second_per_gpu": 11.67 | |
| }, | |
| { | |
| "epoch": 0.5933170334148329, | |
| "grad_norm": 0.2914760112762451, | |
| "learning_rate": 0.00011860718171926007, | |
| "loss": 1.3786, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 546, | |
| "tokens_per_second_per_gpu": 12.5 | |
| }, | |
| { | |
| "epoch": 0.5944036946481934, | |
| "grad_norm": 0.28285399079322815, | |
| "learning_rate": 0.0001188248095756257, | |
| "loss": 1.2903, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 547, | |
| "tokens_per_second_per_gpu": 10.17 | |
| }, | |
| { | |
| "epoch": 0.595490355881554, | |
| "grad_norm": 0.38129904866218567, | |
| "learning_rate": 0.00011904243743199131, | |
| "loss": 1.2505, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 548, | |
| "tokens_per_second_per_gpu": 7.48 | |
| }, | |
| { | |
| "epoch": 0.5965770171149144, | |
| "grad_norm": 0.2210085242986679, | |
| "learning_rate": 0.00011926006528835691, | |
| "loss": 1.3161, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 549, | |
| "tokens_per_second_per_gpu": 6.57 | |
| }, | |
| { | |
| "epoch": 0.5976636783482749, | |
| "grad_norm": 0.34959039092063904, | |
| "learning_rate": 0.00011947769314472252, | |
| "loss": 1.3799, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 550, | |
| "tokens_per_second_per_gpu": 8.65 | |
| }, | |
| { | |
| "epoch": 0.5987503395816354, | |
| "grad_norm": 0.28282710909843445, | |
| "learning_rate": 0.00011969532100108814, | |
| "loss": 1.3168, | |
| "memory/device_reserved (GiB)": 76.61, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 551, | |
| "tokens_per_second_per_gpu": 9.85 | |
| }, | |
| { | |
| "epoch": 0.5998370008149959, | |
| "grad_norm": 0.2631722390651703, | |
| "learning_rate": 0.00011991294885745376, | |
| "loss": 1.2816, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 552, | |
| "tokens_per_second_per_gpu": 10.57 | |
| }, | |
| { | |
| "epoch": 0.6009236620483565, | |
| "grad_norm": 0.41391441226005554, | |
| "learning_rate": 0.00012013057671381938, | |
| "loss": 1.1986, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 553, | |
| "tokens_per_second_per_gpu": 3.67 | |
| }, | |
| { | |
| "epoch": 0.6020103232817169, | |
| "grad_norm": 0.2636446952819824, | |
| "learning_rate": 0.00012034820457018499, | |
| "loss": 1.3496, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 554, | |
| "tokens_per_second_per_gpu": 14.49 | |
| }, | |
| { | |
| "epoch": 0.6030969845150774, | |
| "grad_norm": 0.3454253077507019, | |
| "learning_rate": 0.00012056583242655059, | |
| "loss": 1.2901, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 555, | |
| "tokens_per_second_per_gpu": 6.91 | |
| }, | |
| { | |
| "epoch": 0.604183645748438, | |
| "grad_norm": 0.42469850182533264, | |
| "learning_rate": 0.00012078346028291623, | |
| "loss": 1.3024, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 556, | |
| "tokens_per_second_per_gpu": 3.76 | |
| }, | |
| { | |
| "epoch": 0.6052703069817984, | |
| "grad_norm": 0.27329909801483154, | |
| "learning_rate": 0.00012100108813928183, | |
| "loss": 1.2256, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 557, | |
| "tokens_per_second_per_gpu": 16.26 | |
| }, | |
| { | |
| "epoch": 0.6063569682151589, | |
| "grad_norm": 0.2523745000362396, | |
| "learning_rate": 0.00012121871599564745, | |
| "loss": 1.2649, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 558, | |
| "tokens_per_second_per_gpu": 10.63 | |
| }, | |
| { | |
| "epoch": 0.6074436294485194, | |
| "grad_norm": 0.3387342095375061, | |
| "learning_rate": 0.00012143634385201306, | |
| "loss": 1.3065, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 559, | |
| "tokens_per_second_per_gpu": 10.08 | |
| }, | |
| { | |
| "epoch": 0.6085302906818799, | |
| "grad_norm": 0.2814262807369232, | |
| "learning_rate": 0.00012165397170837869, | |
| "loss": 1.3539, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 560, | |
| "tokens_per_second_per_gpu": 12.07 | |
| }, | |
| { | |
| "epoch": 0.6096169519152405, | |
| "grad_norm": 0.2710580825805664, | |
| "learning_rate": 0.0001218715995647443, | |
| "loss": 1.345, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 561, | |
| "tokens_per_second_per_gpu": 9.85 | |
| }, | |
| { | |
| "epoch": 0.6107036131486009, | |
| "grad_norm": 0.3519721031188965, | |
| "learning_rate": 0.00012208922742110991, | |
| "loss": 1.4264, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 562, | |
| "tokens_per_second_per_gpu": 4.54 | |
| }, | |
| { | |
| "epoch": 0.6117902743819614, | |
| "grad_norm": 0.28614380955696106, | |
| "learning_rate": 0.00012230685527747553, | |
| "loss": 1.3651, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 563, | |
| "tokens_per_second_per_gpu": 2.08 | |
| }, | |
| { | |
| "epoch": 0.612876935615322, | |
| "grad_norm": 0.22192278504371643, | |
| "learning_rate": 0.00012252448313384112, | |
| "loss": 1.3857, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 564, | |
| "tokens_per_second_per_gpu": 13.97 | |
| }, | |
| { | |
| "epoch": 0.6139635968486824, | |
| "grad_norm": 0.21522602438926697, | |
| "learning_rate": 0.00012274211099020676, | |
| "loss": 1.2802, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 565, | |
| "tokens_per_second_per_gpu": 11.51 | |
| }, | |
| { | |
| "epoch": 0.6150502580820429, | |
| "grad_norm": 0.27608051896095276, | |
| "learning_rate": 0.00012295973884657237, | |
| "loss": 1.3119, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 566, | |
| "tokens_per_second_per_gpu": 10.76 | |
| }, | |
| { | |
| "epoch": 0.6161369193154034, | |
| "grad_norm": 0.2759076654911041, | |
| "learning_rate": 0.00012317736670293798, | |
| "loss": 1.3184, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 567, | |
| "tokens_per_second_per_gpu": 13.98 | |
| }, | |
| { | |
| "epoch": 0.6172235805487639, | |
| "grad_norm": 0.3158048391342163, | |
| "learning_rate": 0.0001233949945593036, | |
| "loss": 1.449, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 568, | |
| "tokens_per_second_per_gpu": 9.12 | |
| }, | |
| { | |
| "epoch": 0.6183102417821245, | |
| "grad_norm": 0.26057320833206177, | |
| "learning_rate": 0.0001236126224156692, | |
| "loss": 1.3753, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 569, | |
| "tokens_per_second_per_gpu": 7.47 | |
| }, | |
| { | |
| "epoch": 0.6193969030154849, | |
| "grad_norm": 0.32286593317985535, | |
| "learning_rate": 0.00012383025027203483, | |
| "loss": 1.2814, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 570, | |
| "tokens_per_second_per_gpu": 7.2 | |
| }, | |
| { | |
| "epoch": 0.6204835642488454, | |
| "grad_norm": 0.32623955607414246, | |
| "learning_rate": 0.00012404787812840044, | |
| "loss": 1.2983, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 571, | |
| "tokens_per_second_per_gpu": 6.2 | |
| }, | |
| { | |
| "epoch": 0.621570225482206, | |
| "grad_norm": 0.3761420249938965, | |
| "learning_rate": 0.00012426550598476605, | |
| "loss": 1.4198, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 572, | |
| "tokens_per_second_per_gpu": 6.95 | |
| }, | |
| { | |
| "epoch": 0.6226568867155664, | |
| "grad_norm": 0.3026738464832306, | |
| "learning_rate": 0.00012448313384113167, | |
| "loss": 1.3204, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 573, | |
| "tokens_per_second_per_gpu": 9.2 | |
| }, | |
| { | |
| "epoch": 0.623743547948927, | |
| "grad_norm": 0.28398361802101135, | |
| "learning_rate": 0.00012470076169749728, | |
| "loss": 1.3443, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 574, | |
| "tokens_per_second_per_gpu": 8.06 | |
| }, | |
| { | |
| "epoch": 0.6248302091822874, | |
| "grad_norm": 0.3637777268886566, | |
| "learning_rate": 0.0001249183895538629, | |
| "loss": 1.3163, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 575, | |
| "tokens_per_second_per_gpu": 4.31 | |
| }, | |
| { | |
| "epoch": 0.6259168704156479, | |
| "grad_norm": 0.3870617747306824, | |
| "learning_rate": 0.0001251360174102285, | |
| "loss": 1.3907, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 576, | |
| "tokens_per_second_per_gpu": 7.83 | |
| }, | |
| { | |
| "epoch": 0.6270035316490085, | |
| "grad_norm": 0.2794150412082672, | |
| "learning_rate": 0.00012535364526659412, | |
| "loss": 1.2238, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 577, | |
| "tokens_per_second_per_gpu": 6.9 | |
| }, | |
| { | |
| "epoch": 0.6280901928823689, | |
| "grad_norm": 0.2682045102119446, | |
| "learning_rate": 0.00012557127312295976, | |
| "loss": 1.3197, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 578, | |
| "tokens_per_second_per_gpu": 9.91 | |
| }, | |
| { | |
| "epoch": 0.6291768541157294, | |
| "grad_norm": 0.2647673785686493, | |
| "learning_rate": 0.00012578890097932538, | |
| "loss": 1.3456, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 579, | |
| "tokens_per_second_per_gpu": 5.8 | |
| }, | |
| { | |
| "epoch": 0.63026351534909, | |
| "grad_norm": 0.3052732050418854, | |
| "learning_rate": 0.00012600652883569096, | |
| "loss": 1.2714, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 580, | |
| "tokens_per_second_per_gpu": 8.88 | |
| }, | |
| { | |
| "epoch": 0.6313501765824504, | |
| "grad_norm": 0.2540159523487091, | |
| "learning_rate": 0.00012622415669205658, | |
| "loss": 1.286, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 581, | |
| "tokens_per_second_per_gpu": 7.9 | |
| }, | |
| { | |
| "epoch": 0.632436837815811, | |
| "grad_norm": 0.24028179049491882, | |
| "learning_rate": 0.0001264417845484222, | |
| "loss": 1.2807, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 582, | |
| "tokens_per_second_per_gpu": 4.57 | |
| }, | |
| { | |
| "epoch": 0.6335234990491714, | |
| "grad_norm": 0.9006481766700745, | |
| "learning_rate": 0.00012665941240478783, | |
| "loss": 1.3624, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 583, | |
| "tokens_per_second_per_gpu": 8.1 | |
| }, | |
| { | |
| "epoch": 0.6346101602825319, | |
| "grad_norm": 0.29973724484443665, | |
| "learning_rate": 0.00012687704026115345, | |
| "loss": 1.307, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 584, | |
| "tokens_per_second_per_gpu": 10.1 | |
| }, | |
| { | |
| "epoch": 0.6356968215158925, | |
| "grad_norm": 0.25734904408454895, | |
| "learning_rate": 0.00012709466811751903, | |
| "loss": 1.36, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 585, | |
| "tokens_per_second_per_gpu": 12.14 | |
| }, | |
| { | |
| "epoch": 0.6367834827492529, | |
| "grad_norm": 0.29763537645339966, | |
| "learning_rate": 0.00012731229597388465, | |
| "loss": 1.3436, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 586, | |
| "tokens_per_second_per_gpu": 7.49 | |
| }, | |
| { | |
| "epoch": 0.6378701439826134, | |
| "grad_norm": 0.276018887758255, | |
| "learning_rate": 0.0001275299238302503, | |
| "loss": 1.2951, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 587, | |
| "tokens_per_second_per_gpu": 17.03 | |
| }, | |
| { | |
| "epoch": 0.6389568052159739, | |
| "grad_norm": 0.23480059206485748, | |
| "learning_rate": 0.0001277475516866159, | |
| "loss": 1.3269, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 588, | |
| "tokens_per_second_per_gpu": 11.51 | |
| }, | |
| { | |
| "epoch": 0.6400434664493344, | |
| "grad_norm": 0.27914443612098694, | |
| "learning_rate": 0.00012796517954298151, | |
| "loss": 1.2843, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 589, | |
| "tokens_per_second_per_gpu": 5.45 | |
| }, | |
| { | |
| "epoch": 0.641130127682695, | |
| "grad_norm": 0.2673723101615906, | |
| "learning_rate": 0.00012818280739934713, | |
| "loss": 1.2061, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 590, | |
| "tokens_per_second_per_gpu": 6.9 | |
| }, | |
| { | |
| "epoch": 0.6422167889160554, | |
| "grad_norm": 0.2757096588611603, | |
| "learning_rate": 0.00012840043525571274, | |
| "loss": 1.2123, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.55, | |
| "memory/max_allocated (GiB)": 72.55, | |
| "step": 591, | |
| "tokens_per_second_per_gpu": 13.45 | |
| }, | |
| { | |
| "epoch": 0.6433034501494159, | |
| "grad_norm": 0.29265737533569336, | |
| "learning_rate": 0.00012861806311207836, | |
| "loss": 1.3674, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 592, | |
| "tokens_per_second_per_gpu": 6.57 | |
| }, | |
| { | |
| "epoch": 0.6443901113827765, | |
| "grad_norm": 0.2785845100879669, | |
| "learning_rate": 0.00012883569096844397, | |
| "loss": 1.3454, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 593, | |
| "tokens_per_second_per_gpu": 8.4 | |
| }, | |
| { | |
| "epoch": 0.6454767726161369, | |
| "grad_norm": 0.3149111270904541, | |
| "learning_rate": 0.00012905331882480958, | |
| "loss": 1.287, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 594, | |
| "tokens_per_second_per_gpu": 3.05 | |
| }, | |
| { | |
| "epoch": 0.6465634338494974, | |
| "grad_norm": 0.314368337392807, | |
| "learning_rate": 0.0001292709466811752, | |
| "loss": 1.2292, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 595, | |
| "tokens_per_second_per_gpu": 5.75 | |
| }, | |
| { | |
| "epoch": 0.6476500950828579, | |
| "grad_norm": 0.32672354578971863, | |
| "learning_rate": 0.0001294885745375408, | |
| "loss": 1.2648, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 596, | |
| "tokens_per_second_per_gpu": 17.44 | |
| }, | |
| { | |
| "epoch": 0.6487367563162184, | |
| "grad_norm": 0.35420507192611694, | |
| "learning_rate": 0.00012970620239390642, | |
| "loss": 1.4073, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 597, | |
| "tokens_per_second_per_gpu": 5.49 | |
| }, | |
| { | |
| "epoch": 0.649823417549579, | |
| "grad_norm": 0.3116598427295685, | |
| "learning_rate": 0.00012992383025027204, | |
| "loss": 1.3453, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 598, | |
| "tokens_per_second_per_gpu": 9.45 | |
| }, | |
| { | |
| "epoch": 0.6509100787829394, | |
| "grad_norm": 0.2902546226978302, | |
| "learning_rate": 0.00013014145810663765, | |
| "loss": 1.268, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 599, | |
| "tokens_per_second_per_gpu": 2.65 | |
| }, | |
| { | |
| "epoch": 0.6519967400162999, | |
| "grad_norm": 0.2858057916164398, | |
| "learning_rate": 0.0001303590859630033, | |
| "loss": 1.2732, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 600, | |
| "tokens_per_second_per_gpu": 8.34 | |
| }, | |
| { | |
| "epoch": 0.6530834012496605, | |
| "grad_norm": 0.23800179362297058, | |
| "learning_rate": 0.00013057671381936888, | |
| "loss": 1.2665, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 601, | |
| "tokens_per_second_per_gpu": 8.06 | |
| }, | |
| { | |
| "epoch": 0.6541700624830209, | |
| "grad_norm": 0.284396767616272, | |
| "learning_rate": 0.0001307943416757345, | |
| "loss": 1.2888, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 602, | |
| "tokens_per_second_per_gpu": 7.82 | |
| }, | |
| { | |
| "epoch": 0.6552567237163814, | |
| "grad_norm": 0.24885518848896027, | |
| "learning_rate": 0.0001310119695321001, | |
| "loss": 1.3419, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 603, | |
| "tokens_per_second_per_gpu": 7.13 | |
| }, | |
| { | |
| "epoch": 0.6563433849497419, | |
| "grad_norm": 0.22954203188419342, | |
| "learning_rate": 0.00013122959738846572, | |
| "loss": 1.2946, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 604, | |
| "tokens_per_second_per_gpu": 14.63 | |
| }, | |
| { | |
| "epoch": 0.6574300461831024, | |
| "grad_norm": 0.2986409664154053, | |
| "learning_rate": 0.00013144722524483136, | |
| "loss": 1.3953, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 605, | |
| "tokens_per_second_per_gpu": 8.73 | |
| }, | |
| { | |
| "epoch": 0.658516707416463, | |
| "grad_norm": 0.22522079944610596, | |
| "learning_rate": 0.00013166485310119695, | |
| "loss": 1.3943, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 606, | |
| "tokens_per_second_per_gpu": 8.38 | |
| }, | |
| { | |
| "epoch": 0.6596033686498234, | |
| "grad_norm": 0.266604483127594, | |
| "learning_rate": 0.00013188248095756256, | |
| "loss": 1.2563, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 607, | |
| "tokens_per_second_per_gpu": 6.52 | |
| }, | |
| { | |
| "epoch": 0.6606900298831839, | |
| "grad_norm": 0.2392929047346115, | |
| "learning_rate": 0.00013210010881392818, | |
| "loss": 1.2555, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 608, | |
| "tokens_per_second_per_gpu": 9.9 | |
| }, | |
| { | |
| "epoch": 0.6617766911165445, | |
| "grad_norm": 0.46127644181251526, | |
| "learning_rate": 0.00013231773667029382, | |
| "loss": 1.3176, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 609, | |
| "tokens_per_second_per_gpu": 4.67 | |
| }, | |
| { | |
| "epoch": 0.6628633523499049, | |
| "grad_norm": 0.27595800161361694, | |
| "learning_rate": 0.00013253536452665943, | |
| "loss": 1.2993, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 610, | |
| "tokens_per_second_per_gpu": 7.58 | |
| }, | |
| { | |
| "epoch": 0.6639500135832654, | |
| "grad_norm": 0.3820529282093048, | |
| "learning_rate": 0.00013275299238302505, | |
| "loss": 1.3346, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 611, | |
| "tokens_per_second_per_gpu": 7.23 | |
| }, | |
| { | |
| "epoch": 0.6650366748166259, | |
| "grad_norm": 0.2833244204521179, | |
| "learning_rate": 0.00013297062023939063, | |
| "loss": 1.2527, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.53, | |
| "memory/max_allocated (GiB)": 72.53, | |
| "step": 612, | |
| "tokens_per_second_per_gpu": 5.64 | |
| }, | |
| { | |
| "epoch": 0.6661233360499864, | |
| "grad_norm": 0.2779436409473419, | |
| "learning_rate": 0.00013318824809575627, | |
| "loss": 1.3506, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.54, | |
| "memory/max_allocated (GiB)": 72.54, | |
| "step": 613, | |
| "tokens_per_second_per_gpu": 10.56 | |
| }, | |
| { | |
| "epoch": 0.667209997283347, | |
| "grad_norm": 0.24191920459270477, | |
| "learning_rate": 0.0001334058759521219, | |
| "loss": 1.3784, | |
| "memory/device_reserved (GiB)": 76.62, | |
| "memory/max_active (GiB)": 72.52, | |
| "memory/max_allocated (GiB)": 72.52, | |
| "step": 614, | |
| "tokens_per_second_per_gpu": 9.93 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 18380, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 307, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.295802980605205e+19, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |