| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 8.035764705882354, | |
| "eval_steps": 500, | |
| "global_step": 4000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.001176470588235294, | |
| "grad_norm": 1.828125, | |
| "learning_rate": 1.0588235294117648e-06, | |
| "loss": 2.0858, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.002352941176470588, | |
| "grad_norm": 1.65625, | |
| "learning_rate": 2.2352941176470592e-06, | |
| "loss": 2.1526, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0035294117647058825, | |
| "grad_norm": 1.3984375, | |
| "learning_rate": 3.4117647058823532e-06, | |
| "loss": 2.0827, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.004705882352941176, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 4.588235294117648e-06, | |
| "loss": 2.0781, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0058823529411764705, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 5.764705882352942e-06, | |
| "loss": 2.0782, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.007058823529411765, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 6.941176470588236e-06, | |
| "loss": 2.0394, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.00823529411764706, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 8.11764705882353e-06, | |
| "loss": 2.0089, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.009411764705882352, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 9.294117647058824e-06, | |
| "loss": 2.0286, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.010588235294117647, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 1.0470588235294118e-05, | |
| "loss": 2.0909, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.011764705882352941, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 1.1647058823529412e-05, | |
| "loss": 2.1799, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.012941176470588235, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 1.2823529411764706e-05, | |
| "loss": 2.2038, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.01411764705882353, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 1.4000000000000001e-05, | |
| "loss": 2.178, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.015294117647058824, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 1.5176470588235295e-05, | |
| "loss": 2.1644, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.01647058823529412, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 1.635294117647059e-05, | |
| "loss": 2.3884, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.01764705882352941, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 1.7529411764705884e-05, | |
| "loss": 2.3895, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.018823529411764704, | |
| "grad_norm": 0.875, | |
| "learning_rate": 1.8705882352941178e-05, | |
| "loss": 2.3945, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 1.988235294117647e-05, | |
| "loss": 2.3828, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.021176470588235293, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 2.1058823529411766e-05, | |
| "loss": 2.4047, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.02235294117647059, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 2.223529411764706e-05, | |
| "loss": 2.4634, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.023529411764705882, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 2.3411764705882354e-05, | |
| "loss": 2.4707, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.024705882352941175, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 2.4588235294117648e-05, | |
| "loss": 2.4523, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.02588235294117647, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 2.576470588235294e-05, | |
| "loss": 2.447, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.027058823529411764, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.6941176470588236e-05, | |
| "loss": 2.4069, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.02823529411764706, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 2.8117647058823533e-05, | |
| "loss": 2.4041, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.029411764705882353, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 2.9294117647058827e-05, | |
| "loss": 2.3953, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.03058823529411765, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 3.0470588235294118e-05, | |
| "loss": 2.359, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.03176470588235294, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 3.164705882352941e-05, | |
| "loss": 2.391, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.03294117647058824, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 3.2823529411764706e-05, | |
| "loss": 2.4101, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.03411764705882353, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 3.4000000000000007e-05, | |
| "loss": 2.3952, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.03529411764705882, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 3.5176470588235294e-05, | |
| "loss": 2.3978, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.036470588235294116, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 3.635294117647059e-05, | |
| "loss": 2.3642, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.03764705882352941, | |
| "grad_norm": 1.0, | |
| "learning_rate": 3.752941176470588e-05, | |
| "loss": 2.365, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.03882352941176471, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 3.870588235294118e-05, | |
| "loss": 2.3587, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 3.988235294117647e-05, | |
| "loss": 2.2768, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.041176470588235294, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 4.1058823529411764e-05, | |
| "loss": 2.2424, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.042352941176470586, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 4.2235294117647065e-05, | |
| "loss": 2.2373, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.04352941176470588, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 4.341176470588236e-05, | |
| "loss": 2.2041, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.04470588235294118, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 4.4588235294117646e-05, | |
| "loss": 2.0364, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.04588235294117647, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 4.576470588235294e-05, | |
| "loss": 2.022, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.047058823529411764, | |
| "grad_norm": 1.125, | |
| "learning_rate": 4.694117647058824e-05, | |
| "loss": 2.0304, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.04823529411764706, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 4.8117647058823535e-05, | |
| "loss": 1.8888, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.04941176470588235, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 4.929411764705882e-05, | |
| "loss": 1.8336, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.05058823529411765, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 4.999996972775767e-05, | |
| "loss": 1.8488, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.05176470588235294, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 4.9999629165873304e-05, | |
| "loss": 1.8564, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.052941176470588235, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 4.999891020697362e-05, | |
| "loss": 1.851, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.05411764705882353, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 4.999781286194085e-05, | |
| "loss": 1.8762, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.0009411764705882, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 4.999633714738453e-05, | |
| "loss": 2.204, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.0021176470588236, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 4.999448308564122e-05, | |
| "loss": 2.0315, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.003294117647059, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 4.999225070477421e-05, | |
| "loss": 1.9977, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.0044705882352942, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 4.9989640038573036e-05, | |
| "loss": 1.98, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0056470588235293, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 4.998665112655302e-05, | |
| "loss": 1.9824, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.0068235294117647, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 4.9983284013954654e-05, | |
| "loss": 1.9695, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.008, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 4.99795387517429e-05, | |
| "loss": 1.9689, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.0091764705882353, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 4.997541539660645e-05, | |
| "loss": 1.9813, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.0103529411764707, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 4.997091401095682e-05, | |
| "loss": 1.9505, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.0115294117647058, | |
| "grad_norm": 1.125, | |
| "learning_rate": 4.99660346629275e-05, | |
| "loss": 2.1193, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.0127058823529411, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 4.9960777426372765e-05, | |
| "loss": 2.1822, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.0138823529411765, | |
| "grad_norm": 1.328125, | |
| "learning_rate": 4.9955142380866725e-05, | |
| "loss": 2.1511, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.0150588235294118, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 4.9949129611702026e-05, | |
| "loss": 2.1491, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.0162352941176471, | |
| "grad_norm": 1.3828125, | |
| "learning_rate": 4.994273920988856e-05, | |
| "loss": 2.2836, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.0174117647058825, | |
| "grad_norm": 1.25, | |
| "learning_rate": 4.9935971272152125e-05, | |
| "loss": 2.3416, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.0185882352941176, | |
| "grad_norm": 1.390625, | |
| "learning_rate": 4.992882590093295e-05, | |
| "loss": 2.3509, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.019764705882353, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 4.9921303204384104e-05, | |
| "loss": 2.3074, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.0209411764705882, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.9913403296369934e-05, | |
| "loss": 2.3293, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.0221176470588236, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 4.9905126296464264e-05, | |
| "loss": 2.3537, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.023294117647059, | |
| "grad_norm": 1.3515625, | |
| "learning_rate": 4.989647232994864e-05, | |
| "loss": 2.3819, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.024470588235294, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 4.9887441527810405e-05, | |
| "loss": 2.3349, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.0256470588235294, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 4.987803402674074e-05, | |
| "loss": 2.362, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.0268235294117647, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 4.9868249969132556e-05, | |
| "loss": 2.307, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.028, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 4.9858089503078386e-05, | |
| "loss": 2.2537, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.0291764705882354, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.984755278236811e-05, | |
| "loss": 2.265, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.0303529411764707, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 4.983663996648664e-05, | |
| "loss": 2.2594, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.0315294117647058, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 4.9825351220611505e-05, | |
| "loss": 2.259, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.0327058823529411, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 4.981368671561035e-05, | |
| "loss": 2.2308, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.0338823529411765, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 4.980164662803836e-05, | |
| "loss": 2.2026, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.0350588235294118, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 4.978923114013554e-05, | |
| "loss": 2.2453, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.0362352941176471, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.977644043982404e-05, | |
| "loss": 2.2228, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.0374117647058823, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 4.9763274720705255e-05, | |
| "loss": 2.1585, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.0385882352941176, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 4.974973418205686e-05, | |
| "loss": 2.1675, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.039764705882353, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 4.97358190288299e-05, | |
| "loss": 2.1195, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.0409411764705883, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.972152947164559e-05, | |
| "loss": 2.0259, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.0421176470588236, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.970686572679216e-05, | |
| "loss": 1.9431, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.043294117647059, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 4.96918280162216e-05, | |
| "loss": 1.9651, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.044470588235294, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 4.967641656754629e-05, | |
| "loss": 1.8204, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.0456470588235294, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 4.966063161403552e-05, | |
| "loss": 1.751, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.0468235294117647, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 4.964447339461201e-05, | |
| "loss": 1.7183, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.048, | |
| "grad_norm": 1.2421875, | |
| "learning_rate": 4.962794215384827e-05, | |
| "loss": 1.6285, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.0491764705882354, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 4.9611038141962905e-05, | |
| "loss": 1.5325, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.0503529411764705, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.9593761614816804e-05, | |
| "loss": 1.5636, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.0515294117647058, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 4.9576112833909304e-05, | |
| "loss": 1.5401, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.0527058823529412, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 4.955809206637422e-05, | |
| "loss": 1.5422, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.0538823529411765, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 4.953969958497576e-05, | |
| "loss": 1.5396, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.0007058823529413, | |
| "grad_norm": 1.40625, | |
| "learning_rate": 4.952093566810447e-05, | |
| "loss": 1.8166, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.0018823529411764, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 4.9501800599772965e-05, | |
| "loss": 1.7246, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.0030588235294116, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.948229466961166e-05, | |
| "loss": 1.684, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.004235294117647, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.946241817286435e-05, | |
| "loss": 1.7107, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.0054117647058822, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 4.944217141038379e-05, | |
| "loss": 1.6617, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.006588235294118, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.94215546886271e-05, | |
| "loss": 1.643, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.007764705882353, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 4.9400568319651154e-05, | |
| "loss": 1.6622, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.0089411764705885, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 4.937921262110784e-05, | |
| "loss": 1.6658, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.0101176470588236, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 4.935748791623924e-05, | |
| "loss": 1.6528, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.0112941176470587, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 4.933539453387279e-05, | |
| "loss": 1.8627, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.0124705882352942, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 4.9312932808416235e-05, | |
| "loss": 1.8975, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.0136470588235293, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 4.929010307985262e-05, | |
| "loss": 1.8523, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.014823529411765, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.926690569373512e-05, | |
| "loss": 1.8695, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.016, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 4.924334100118181e-05, | |
| "loss": 1.9365, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.017176470588235, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 4.921940935887034e-05, | |
| "loss": 2.0436, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.0183529411764707, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 4.919511112903258e-05, | |
| "loss": 2.0563, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.019529411764706, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 4.9170446679449076e-05, | |
| "loss": 2.0594, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.0207058823529414, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 4.9145416383443544e-05, | |
| "loss": 2.048, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.0218823529411765, | |
| "grad_norm": 1.234375, | |
| "learning_rate": 4.9120020619877175e-05, | |
| "loss": 2.0867, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.0230588235294116, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 4.9094259773142905e-05, | |
| "loss": 2.0633, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.024235294117647, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 4.906813423315963e-05, | |
| "loss": 2.0737, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.0254117647058822, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 4.904164439536626e-05, | |
| "loss": 2.0827, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.026588235294118, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 4.901479066071577e-05, | |
| "loss": 2.0431, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.027764705882353, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 4.898757343566912e-05, | |
| "loss": 1.9364, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.028941176470588, | |
| "grad_norm": 1.203125, | |
| "learning_rate": 4.895999313218907e-05, | |
| "loss": 1.9666, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.0301176470588236, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 4.893205016773401e-05, | |
| "loss": 1.9512, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.0312941176470587, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 4.890374496525157e-05, | |
| "loss": 1.9265, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.0324705882352943, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 4.88750779531723e-05, | |
| "loss": 1.9203, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.0336470588235294, | |
| "grad_norm": 1.296875, | |
| "learning_rate": 4.884604956540308e-05, | |
| "loss": 1.9331, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.034823529411765, | |
| "grad_norm": 1.2890625, | |
| "learning_rate": 4.881666024132065e-05, | |
| "loss": 1.9, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.036, | |
| "grad_norm": 1.28125, | |
| "learning_rate": 4.878691042576491e-05, | |
| "loss": 1.8835, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.037176470588235, | |
| "grad_norm": 1.265625, | |
| "learning_rate": 4.875680056903219e-05, | |
| "loss": 1.8417, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.0383529411764707, | |
| "grad_norm": 1.265625, | |
| "learning_rate": 4.872633112686846e-05, | |
| "loss": 1.7935, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.039529411764706, | |
| "grad_norm": 1.3671875, | |
| "learning_rate": 4.869550256046238e-05, | |
| "loss": 1.8484, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.0407058823529414, | |
| "grad_norm": 1.3125, | |
| "learning_rate": 4.866431533643839e-05, | |
| "loss": 1.6442, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.0418823529411765, | |
| "grad_norm": 1.25, | |
| "learning_rate": 4.863276992684959e-05, | |
| "loss": 1.6369, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.0430588235294116, | |
| "grad_norm": 1.28125, | |
| "learning_rate": 4.8600866809170606e-05, | |
| "loss": 1.6172, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.044235294117647, | |
| "grad_norm": 1.359375, | |
| "learning_rate": 4.856860646629039e-05, | |
| "loss": 1.5185, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.0454117647058823, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 4.853598938650487e-05, | |
| "loss": 1.4239, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.046588235294118, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 4.850301606350961e-05, | |
| "loss": 1.4144, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.047764705882353, | |
| "grad_norm": 1.53125, | |
| "learning_rate": 4.8469686996392296e-05, | |
| "loss": 1.3227, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.048941176470588, | |
| "grad_norm": 1.328125, | |
| "learning_rate": 4.843600268962517e-05, | |
| "loss": 1.2229, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.0501176470588236, | |
| "grad_norm": 1.3203125, | |
| "learning_rate": 4.840196365305747e-05, | |
| "loss": 1.2018, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.0512941176470587, | |
| "grad_norm": 1.328125, | |
| "learning_rate": 4.8367570401907605e-05, | |
| "loss": 1.2035, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.0524705882352943, | |
| "grad_norm": 1.3359375, | |
| "learning_rate": 4.833282345675546e-05, | |
| "loss": 1.1799, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.0536470588235294, | |
| "grad_norm": 1.4453125, | |
| "learning_rate": 4.829772334353443e-05, | |
| "loss": 1.2021, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 3.0004705882352942, | |
| "grad_norm": 2.203125, | |
| "learning_rate": 4.826227059352354e-05, | |
| "loss": 1.402, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 3.0016470588235293, | |
| "grad_norm": 1.5078125, | |
| "learning_rate": 4.822646574333933e-05, | |
| "loss": 1.3918, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.002823529411765, | |
| "grad_norm": 1.453125, | |
| "learning_rate": 4.8190309334927774e-05, | |
| "loss": 1.329, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 3.004, | |
| "grad_norm": 1.375, | |
| "learning_rate": 4.815380191555608e-05, | |
| "loss": 1.3318, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 3.005176470588235, | |
| "grad_norm": 1.359375, | |
| "learning_rate": 4.811694403780438e-05, | |
| "loss": 1.284, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 3.0063529411764707, | |
| "grad_norm": 1.359375, | |
| "learning_rate": 4.807973625955739e-05, | |
| "loss": 1.2961, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 3.007529411764706, | |
| "grad_norm": 1.3046875, | |
| "learning_rate": 4.804217914399595e-05, | |
| "loss": 1.297, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 3.0087058823529413, | |
| "grad_norm": 1.3671875, | |
| "learning_rate": 4.8004273259588503e-05, | |
| "loss": 1.2985, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 3.0098823529411765, | |
| "grad_norm": 1.359375, | |
| "learning_rate": 4.7966019180082525e-05, | |
| "loss": 1.2931, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 3.0110588235294116, | |
| "grad_norm": 1.4453125, | |
| "learning_rate": 4.792741748449575e-05, | |
| "loss": 1.4302, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 3.012235294117647, | |
| "grad_norm": 1.375, | |
| "learning_rate": 4.788846875710753e-05, | |
| "loss": 1.4525, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 3.0134117647058822, | |
| "grad_norm": 1.4453125, | |
| "learning_rate": 4.784917358744988e-05, | |
| "loss": 1.4825, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.014588235294118, | |
| "grad_norm": 1.359375, | |
| "learning_rate": 4.7809532570298635e-05, | |
| "loss": 1.4848, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 3.015764705882353, | |
| "grad_norm": 1.40625, | |
| "learning_rate": 4.776954630566438e-05, | |
| "loss": 1.5629, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 3.016941176470588, | |
| "grad_norm": 1.40625, | |
| "learning_rate": 4.7729215398783435e-05, | |
| "loss": 1.6696, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 3.0181176470588236, | |
| "grad_norm": 1.375, | |
| "learning_rate": 4.7688540460108634e-05, | |
| "loss": 1.6701, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 3.0192941176470587, | |
| "grad_norm": 1.3828125, | |
| "learning_rate": 4.7647522105300135e-05, | |
| "loss": 1.6645, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 3.0204705882352942, | |
| "grad_norm": 1.3984375, | |
| "learning_rate": 4.7606160955216065e-05, | |
| "loss": 1.6469, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 3.0216470588235294, | |
| "grad_norm": 1.390625, | |
| "learning_rate": 4.7564457635903145e-05, | |
| "loss": 1.6738, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 3.022823529411765, | |
| "grad_norm": 1.4453125, | |
| "learning_rate": 4.7522412778587187e-05, | |
| "loss": 1.7208, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 3.024, | |
| "grad_norm": 1.4140625, | |
| "learning_rate": 4.7480027019663585e-05, | |
| "loss": 1.6901, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 3.025176470588235, | |
| "grad_norm": 1.484375, | |
| "learning_rate": 4.7437301000687645e-05, | |
| "loss": 1.6812, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.0263529411764707, | |
| "grad_norm": 1.5234375, | |
| "learning_rate": 4.739423536836487e-05, | |
| "loss": 1.691, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 3.027529411764706, | |
| "grad_norm": 1.4296875, | |
| "learning_rate": 4.73508307745412e-05, | |
| "loss": 1.5832, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 3.0287058823529414, | |
| "grad_norm": 1.3828125, | |
| "learning_rate": 4.730708787619313e-05, | |
| "loss": 1.5916, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 3.0298823529411765, | |
| "grad_norm": 1.40625, | |
| "learning_rate": 4.726300733541779e-05, | |
| "loss": 1.5883, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 3.0310588235294116, | |
| "grad_norm": 1.4375, | |
| "learning_rate": 4.721858981942284e-05, | |
| "loss": 1.5932, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 3.032235294117647, | |
| "grad_norm": 1.4453125, | |
| "learning_rate": 4.717383600051651e-05, | |
| "loss": 1.5635, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 3.0334117647058823, | |
| "grad_norm": 1.4453125, | |
| "learning_rate": 4.712874655609728e-05, | |
| "loss": 1.5661, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 3.034588235294118, | |
| "grad_norm": 1.484375, | |
| "learning_rate": 4.708332216864374e-05, | |
| "loss": 1.5429, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 3.035764705882353, | |
| "grad_norm": 1.4765625, | |
| "learning_rate": 4.703756352570418e-05, | |
| "loss": 1.5332, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 3.036941176470588, | |
| "grad_norm": 1.4609375, | |
| "learning_rate": 4.699147131988624e-05, | |
| "loss": 1.466, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.0381176470588236, | |
| "grad_norm": 1.4921875, | |
| "learning_rate": 4.694504624884639e-05, | |
| "loss": 1.4506, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 3.0392941176470587, | |
| "grad_norm": 1.5, | |
| "learning_rate": 4.6898289015279385e-05, | |
| "loss": 1.4586, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 3.0404705882352943, | |
| "grad_norm": 1.5390625, | |
| "learning_rate": 4.6851200326907626e-05, | |
| "loss": 1.3431, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 3.0416470588235294, | |
| "grad_norm": 1.5390625, | |
| "learning_rate": 4.680378089647045e-05, | |
| "loss": 1.2979, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 3.0428235294117645, | |
| "grad_norm": 1.4921875, | |
| "learning_rate": 4.675603144171335e-05, | |
| "loss": 1.2621, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 3.044, | |
| "grad_norm": 1.546875, | |
| "learning_rate": 4.670795268537709e-05, | |
| "loss": 1.1714, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 3.045176470588235, | |
| "grad_norm": 1.5, | |
| "learning_rate": 4.665954535518678e-05, | |
| "loss": 1.0755, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 3.0463529411764707, | |
| "grad_norm": 1.421875, | |
| "learning_rate": 4.6610810183840856e-05, | |
| "loss": 1.0567, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 3.047529411764706, | |
| "grad_norm": 1.4765625, | |
| "learning_rate": 4.6561747908999994e-05, | |
| "loss": 1.0096, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 3.0487058823529414, | |
| "grad_norm": 1.3828125, | |
| "learning_rate": 4.651235927327594e-05, | |
| "loss": 0.9038, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.0498823529411765, | |
| "grad_norm": 1.4765625, | |
| "learning_rate": 4.646264502422029e-05, | |
| "loss": 0.8959, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 3.0510588235294116, | |
| "grad_norm": 1.4609375, | |
| "learning_rate": 4.6412605914313144e-05, | |
| "loss": 0.8875, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 3.052235294117647, | |
| "grad_norm": 1.4453125, | |
| "learning_rate": 4.636224270095171e-05, | |
| "loss": 0.8674, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 3.0534117647058823, | |
| "grad_norm": 1.453125, | |
| "learning_rate": 4.63115561464389e-05, | |
| "loss": 0.8659, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 4.000235294117647, | |
| "grad_norm": 2.46875, | |
| "learning_rate": 4.626054701797173e-05, | |
| "loss": 1.0182, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 4.001411764705883, | |
| "grad_norm": 1.671875, | |
| "learning_rate": 4.620921608762973e-05, | |
| "loss": 1.1009, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 4.002588235294118, | |
| "grad_norm": 1.65625, | |
| "learning_rate": 4.615756413236325e-05, | |
| "loss": 1.0102, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 4.003764705882353, | |
| "grad_norm": 1.578125, | |
| "learning_rate": 4.6105591933981716e-05, | |
| "loss": 0.9855, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 4.004941176470588, | |
| "grad_norm": 1.4609375, | |
| "learning_rate": 4.6053300279141797e-05, | |
| "loss": 0.9633, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 4.006117647058823, | |
| "grad_norm": 1.546875, | |
| "learning_rate": 4.6000689959335474e-05, | |
| "loss": 0.9708, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 4.007294117647059, | |
| "grad_norm": 1.421875, | |
| "learning_rate": 4.594776177087807e-05, | |
| "loss": 0.9582, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 4.008470588235294, | |
| "grad_norm": 1.5390625, | |
| "learning_rate": 4.589451651489623e-05, | |
| "loss": 0.94, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 4.009647058823529, | |
| "grad_norm": 1.53125, | |
| "learning_rate": 4.584095499731572e-05, | |
| "loss": 0.9422, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 4.0108235294117645, | |
| "grad_norm": 1.6953125, | |
| "learning_rate": 4.578707802884932e-05, | |
| "loss": 1.0069, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 4.012, | |
| "grad_norm": 1.578125, | |
| "learning_rate": 4.5732886424984475e-05, | |
| "loss": 1.115, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 4.013176470588236, | |
| "grad_norm": 1.5625, | |
| "learning_rate": 4.5678381005971014e-05, | |
| "loss": 1.1126, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 4.014352941176471, | |
| "grad_norm": 1.5234375, | |
| "learning_rate": 4.5623562596808685e-05, | |
| "loss": 1.094, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 4.015529411764706, | |
| "grad_norm": 1.6328125, | |
| "learning_rate": 4.556843202723469e-05, | |
| "loss": 1.0964, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 4.016705882352941, | |
| "grad_norm": 1.6171875, | |
| "learning_rate": 4.551299013171111e-05, | |
| "loss": 1.2786, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 4.017882352941177, | |
| "grad_norm": 1.6484375, | |
| "learning_rate": 4.545723774941234e-05, | |
| "loss": 1.2753, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.019058823529412, | |
| "grad_norm": 1.546875, | |
| "learning_rate": 4.540117572421226e-05, | |
| "loss": 1.2446, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 4.020235294117647, | |
| "grad_norm": 1.640625, | |
| "learning_rate": 4.534480490467161e-05, | |
| "loss": 1.2742, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 4.021411764705882, | |
| "grad_norm": 1.6953125, | |
| "learning_rate": 4.528812614402503e-05, | |
| "loss": 1.284, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 4.022588235294117, | |
| "grad_norm": 1.671875, | |
| "learning_rate": 4.523114030016819e-05, | |
| "loss": 1.2935, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 4.023764705882353, | |
| "grad_norm": 1.6171875, | |
| "learning_rate": 4.517384823564483e-05, | |
| "loss": 1.2918, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 4.0249411764705885, | |
| "grad_norm": 1.5859375, | |
| "learning_rate": 4.511625081763366e-05, | |
| "loss": 1.3194, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 4.026117647058824, | |
| "grad_norm": 1.640625, | |
| "learning_rate": 4.505834891793523e-05, | |
| "loss": 1.3148, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 4.027294117647059, | |
| "grad_norm": 1.6484375, | |
| "learning_rate": 4.5000143412958805e-05, | |
| "loss": 1.2555, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 4.028470588235294, | |
| "grad_norm": 1.6171875, | |
| "learning_rate": 4.494163518370902e-05, | |
| "loss": 1.2144, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 4.02964705882353, | |
| "grad_norm": 1.578125, | |
| "learning_rate": 4.4882825115772584e-05, | |
| "loss": 1.2251, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 4.030823529411765, | |
| "grad_norm": 1.609375, | |
| "learning_rate": 4.482371409930488e-05, | |
| "loss": 1.193, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 4.032, | |
| "grad_norm": 1.578125, | |
| "learning_rate": 4.476430302901645e-05, | |
| "loss": 1.1761, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 4.033176470588235, | |
| "grad_norm": 1.71875, | |
| "learning_rate": 4.470459280415951e-05, | |
| "loss": 1.1914, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 4.03435294117647, | |
| "grad_norm": 1.6953125, | |
| "learning_rate": 4.46445843285143e-05, | |
| "loss": 1.178, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 4.035529411764706, | |
| "grad_norm": 1.703125, | |
| "learning_rate": 4.458427851037541e-05, | |
| "loss": 1.1646, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 4.036705882352941, | |
| "grad_norm": 1.6484375, | |
| "learning_rate": 4.452367626253805e-05, | |
| "loss": 1.1211, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 4.0378823529411765, | |
| "grad_norm": 1.6640625, | |
| "learning_rate": 4.446277850228421e-05, | |
| "loss": 1.0985, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 4.039058823529412, | |
| "grad_norm": 1.6953125, | |
| "learning_rate": 4.44015861513688e-05, | |
| "loss": 1.0926, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 4.040235294117647, | |
| "grad_norm": 1.7109375, | |
| "learning_rate": 4.434010013600567e-05, | |
| "loss": 0.9911, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 4.041411764705883, | |
| "grad_norm": 1.640625, | |
| "learning_rate": 4.4278321386853605e-05, | |
| "loss": 0.9251, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 4.042588235294118, | |
| "grad_norm": 1.6875, | |
| "learning_rate": 4.421625083900226e-05, | |
| "loss": 0.9098, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 4.043764705882353, | |
| "grad_norm": 1.7265625, | |
| "learning_rate": 4.415388943195797e-05, | |
| "loss": 0.9025, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 4.044941176470588, | |
| "grad_norm": 1.5859375, | |
| "learning_rate": 4.409123810962955e-05, | |
| "loss": 0.7625, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 4.046117647058823, | |
| "grad_norm": 1.59375, | |
| "learning_rate": 4.402829782031399e-05, | |
| "loss": 0.7713, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 4.047294117647059, | |
| "grad_norm": 1.5546875, | |
| "learning_rate": 4.396506951668214e-05, | |
| "loss": 0.725, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 4.048470588235294, | |
| "grad_norm": 1.4921875, | |
| "learning_rate": 4.3901554155764244e-05, | |
| "loss": 0.6342, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 4.049647058823529, | |
| "grad_norm": 1.515625, | |
| "learning_rate": 4.383775269893549e-05, | |
| "loss": 0.6247, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 4.0508235294117645, | |
| "grad_norm": 1.5390625, | |
| "learning_rate": 4.377366611190141e-05, | |
| "loss": 0.595, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 4.052, | |
| "grad_norm": 1.5078125, | |
| "learning_rate": 4.3709295364683366e-05, | |
| "loss": 0.5783, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 4.053176470588236, | |
| "grad_norm": 1.5, | |
| "learning_rate": 4.364464143160372e-05, | |
| "loss": 0.5841, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 4.054352941176471, | |
| "grad_norm": 1.609375, | |
| "learning_rate": 4.357970529127121e-05, | |
| "loss": 0.5763, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 5.001176470588235, | |
| "grad_norm": 2.15625, | |
| "learning_rate": 4.351448792656609e-05, | |
| "loss": 0.8959, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 5.00235294117647, | |
| "grad_norm": 1.6796875, | |
| "learning_rate": 4.3448990324625244e-05, | |
| "loss": 0.7412, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 5.003529411764706, | |
| "grad_norm": 1.5703125, | |
| "learning_rate": 4.338321347682726e-05, | |
| "loss": 0.6903, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 5.004705882352941, | |
| "grad_norm": 1.625, | |
| "learning_rate": 4.3317158378777424e-05, | |
| "loss": 0.6742, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 5.0058823529411764, | |
| "grad_norm": 1.53125, | |
| "learning_rate": 4.325082603029264e-05, | |
| "loss": 0.6565, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 5.007058823529412, | |
| "grad_norm": 1.609375, | |
| "learning_rate": 4.318421743538632e-05, | |
| "loss": 0.6434, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 5.008235294117647, | |
| "grad_norm": 1.6171875, | |
| "learning_rate": 4.311733360225314e-05, | |
| "loss": 0.6502, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 5.009411764705883, | |
| "grad_norm": 1.5625, | |
| "learning_rate": 4.3050175543253845e-05, | |
| "loss": 0.6587, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 5.010588235294118, | |
| "grad_norm": 1.796875, | |
| "learning_rate": 4.2982744274899865e-05, | |
| "loss": 0.6593, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 5.011764705882353, | |
| "grad_norm": 1.625, | |
| "learning_rate": 4.291504081783798e-05, | |
| "loss": 0.7849, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 5.012941176470588, | |
| "grad_norm": 1.640625, | |
| "learning_rate": 4.284706619683483e-05, | |
| "loss": 0.7649, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 5.014117647058823, | |
| "grad_norm": 1.7109375, | |
| "learning_rate": 4.2778821440761416e-05, | |
| "loss": 0.748, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 5.015294117647059, | |
| "grad_norm": 1.671875, | |
| "learning_rate": 4.271030758257756e-05, | |
| "loss": 0.7535, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 5.016470588235294, | |
| "grad_norm": 1.8828125, | |
| "learning_rate": 4.264152565931619e-05, | |
| "loss": 0.905, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 5.017647058823529, | |
| "grad_norm": 1.828125, | |
| "learning_rate": 4.257247671206777e-05, | |
| "loss": 0.9189, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 5.0188235294117645, | |
| "grad_norm": 1.796875, | |
| "learning_rate": 4.2503161785964396e-05, | |
| "loss": 0.8889, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "grad_norm": 1.8125, | |
| "learning_rate": 4.2433581930164075e-05, | |
| "loss": 0.8973, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 5.021176470588236, | |
| "grad_norm": 1.84375, | |
| "learning_rate": 4.236373819783484e-05, | |
| "loss": 0.9167, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 5.022352941176471, | |
| "grad_norm": 1.859375, | |
| "learning_rate": 4.229363164613874e-05, | |
| "loss": 0.9335, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 5.023529411764706, | |
| "grad_norm": 1.828125, | |
| "learning_rate": 4.222326333621592e-05, | |
| "loss": 0.9327, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 5.024705882352941, | |
| "grad_norm": 1.8203125, | |
| "learning_rate": 4.215263433316854e-05, | |
| "loss": 0.9492, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 5.025882352941177, | |
| "grad_norm": 1.7890625, | |
| "learning_rate": 4.2081745706044595e-05, | |
| "loss": 0.9445, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 5.027058823529412, | |
| "grad_norm": 1.8125, | |
| "learning_rate": 4.20105985278218e-05, | |
| "loss": 0.8808, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 5.028235294117647, | |
| "grad_norm": 1.8046875, | |
| "learning_rate": 4.193919387539132e-05, | |
| "loss": 0.8726, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 5.029411764705882, | |
| "grad_norm": 1.765625, | |
| "learning_rate": 4.1867532829541486e-05, | |
| "loss": 0.8666, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 5.030588235294117, | |
| "grad_norm": 1.765625, | |
| "learning_rate": 4.179561647494144e-05, | |
| "loss": 0.868, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 5.031764705882353, | |
| "grad_norm": 1.8203125, | |
| "learning_rate": 4.1723445900124645e-05, | |
| "loss": 0.8456, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 5.0329411764705885, | |
| "grad_norm": 1.859375, | |
| "learning_rate": 4.165102219747254e-05, | |
| "loss": 0.8436, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 5.034117647058824, | |
| "grad_norm": 1.75, | |
| "learning_rate": 4.15783464631979e-05, | |
| "loss": 0.8364, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 5.035294117647059, | |
| "grad_norm": 1.796875, | |
| "learning_rate": 4.150541979732828e-05, | |
| "loss": 0.8309, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 5.036470588235294, | |
| "grad_norm": 1.8203125, | |
| "learning_rate": 4.143224330368934e-05, | |
| "loss": 0.7866, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 5.03764705882353, | |
| "grad_norm": 1.8125, | |
| "learning_rate": 4.1358818089888204e-05, | |
| "loss": 0.7738, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 5.038823529411765, | |
| "grad_norm": 1.7890625, | |
| "learning_rate": 4.1285145267296634e-05, | |
| "loss": 0.7681, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "grad_norm": 1.8515625, | |
| "learning_rate": 4.1211225951034185e-05, | |
| "loss": 0.7145, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 5.041176470588235, | |
| "grad_norm": 1.7109375, | |
| "learning_rate": 4.1137061259951426e-05, | |
| "loss": 0.6369, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 5.04235294117647, | |
| "grad_norm": 1.7734375, | |
| "learning_rate": 4.106265231661292e-05, | |
| "loss": 0.6342, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 5.043529411764706, | |
| "grad_norm": 1.7421875, | |
| "learning_rate": 4.0988000247280225e-05, | |
| "loss": 0.6336, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 5.044705882352941, | |
| "grad_norm": 1.6015625, | |
| "learning_rate": 4.0913106181894955e-05, | |
| "loss": 0.4874, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 5.0458823529411765, | |
| "grad_norm": 1.6171875, | |
| "learning_rate": 4.083797125406153e-05, | |
| "loss": 0.4932, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 5.047058823529412, | |
| "grad_norm": 1.703125, | |
| "learning_rate": 4.0762596601030114e-05, | |
| "loss": 0.506, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 5.048235294117647, | |
| "grad_norm": 1.4140625, | |
| "learning_rate": 4.068698336367939e-05, | |
| "loss": 0.404, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 5.049411764705883, | |
| "grad_norm": 1.4765625, | |
| "learning_rate": 4.061113268649927e-05, | |
| "loss": 0.4039, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 5.050588235294118, | |
| "grad_norm": 1.625, | |
| "learning_rate": 4.0535045717573564e-05, | |
| "loss": 0.3899, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 5.051764705882353, | |
| "grad_norm": 1.4609375, | |
| "learning_rate": 4.0458723608562655e-05, | |
| "loss": 0.373, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 5.052941176470588, | |
| "grad_norm": 1.546875, | |
| "learning_rate": 4.0382167514685995e-05, | |
| "loss": 0.375, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 5.054117647058823, | |
| "grad_norm": 1.453125, | |
| "learning_rate": 4.030537859470467e-05, | |
| "loss": 0.3653, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 6.0009411764705884, | |
| "grad_norm": 2.15625, | |
| "learning_rate": 4.022835801090384e-05, | |
| "loss": 0.5957, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 6.002117647058824, | |
| "grad_norm": 1.71875, | |
| "learning_rate": 4.0151106929075175e-05, | |
| "loss": 0.5094, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 6.003294117647059, | |
| "grad_norm": 1.5234375, | |
| "learning_rate": 4.007362651849915e-05, | |
| "loss": 0.4521, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 6.004470588235294, | |
| "grad_norm": 1.46875, | |
| "learning_rate": 3.999591795192743e-05, | |
| "loss": 0.4461, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 6.00564705882353, | |
| "grad_norm": 1.5078125, | |
| "learning_rate": 3.9917982405565024e-05, | |
| "loss": 0.4312, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 6.006823529411765, | |
| "grad_norm": 1.6171875, | |
| "learning_rate": 3.983982105905257e-05, | |
| "loss": 0.4308, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 6.008, | |
| "grad_norm": 1.4921875, | |
| "learning_rate": 3.976143509544843e-05, | |
| "loss": 0.4118, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 6.009176470588235, | |
| "grad_norm": 1.4765625, | |
| "learning_rate": 3.968282570121078e-05, | |
| "loss": 0.4189, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 6.01035294117647, | |
| "grad_norm": 1.453125, | |
| "learning_rate": 3.960399406617967e-05, | |
| "loss": 0.4238, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 6.011529411764706, | |
| "grad_norm": 1.7578125, | |
| "learning_rate": 3.952494138355903e-05, | |
| "loss": 0.5081, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 6.012705882352941, | |
| "grad_norm": 1.7890625, | |
| "learning_rate": 3.944566884989857e-05, | |
| "loss": 0.489, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 6.0138823529411765, | |
| "grad_norm": 1.6640625, | |
| "learning_rate": 3.936617766507569e-05, | |
| "loss": 0.4868, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 6.015058823529412, | |
| "grad_norm": 1.734375, | |
| "learning_rate": 3.928646903227732e-05, | |
| "loss": 0.4991, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 6.016235294117647, | |
| "grad_norm": 1.8828125, | |
| "learning_rate": 3.920654415798172e-05, | |
| "loss": 0.5644, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 6.017411764705883, | |
| "grad_norm": 1.90625, | |
| "learning_rate": 3.912640425194016e-05, | |
| "loss": 0.614, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 6.018588235294118, | |
| "grad_norm": 1.8515625, | |
| "learning_rate": 3.9046050527158716e-05, | |
| "loss": 0.6116, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 6.019764705882353, | |
| "grad_norm": 1.90625, | |
| "learning_rate": 3.896548419987981e-05, | |
| "loss": 0.5977, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 6.020941176470588, | |
| "grad_norm": 1.8046875, | |
| "learning_rate": 3.888470648956381e-05, | |
| "loss": 0.584, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 6.022117647058823, | |
| "grad_norm": 1.890625, | |
| "learning_rate": 3.880371861887067e-05, | |
| "loss": 0.6211, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 6.023294117647059, | |
| "grad_norm": 1.8671875, | |
| "learning_rate": 3.872252181364129e-05, | |
| "loss": 0.6289, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 6.024470588235294, | |
| "grad_norm": 1.90625, | |
| "learning_rate": 3.864111730287906e-05, | |
| "loss": 0.6188, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 6.025647058823529, | |
| "grad_norm": 1.8125, | |
| "learning_rate": 3.855950631873121e-05, | |
| "loss": 0.6316, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 6.0268235294117645, | |
| "grad_norm": 1.7890625, | |
| "learning_rate": 3.847769009647019e-05, | |
| "loss": 0.5999, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 6.028, | |
| "grad_norm": 1.7734375, | |
| "learning_rate": 3.8395669874474915e-05, | |
| "loss": 0.5684, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 6.029176470588236, | |
| "grad_norm": 1.7890625, | |
| "learning_rate": 3.831344689421211e-05, | |
| "loss": 0.5726, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 6.030352941176471, | |
| "grad_norm": 1.890625, | |
| "learning_rate": 3.8231022400217444e-05, | |
| "loss": 0.5668, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 6.031529411764706, | |
| "grad_norm": 1.7734375, | |
| "learning_rate": 3.814839764007673e-05, | |
| "loss": 0.5604, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 6.032705882352941, | |
| "grad_norm": 1.8203125, | |
| "learning_rate": 3.8065573864407e-05, | |
| "loss": 0.5586, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 6.033882352941176, | |
| "grad_norm": 1.796875, | |
| "learning_rate": 3.798255232683765e-05, | |
| "loss": 0.5535, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 6.035058823529412, | |
| "grad_norm": 1.8515625, | |
| "learning_rate": 3.7899334283991395e-05, | |
| "loss": 0.5577, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 6.036235294117647, | |
| "grad_norm": 1.84375, | |
| "learning_rate": 3.781592099546528e-05, | |
| "loss": 0.516, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 6.037411764705882, | |
| "grad_norm": 1.8125, | |
| "learning_rate": 3.7732313723811615e-05, | |
| "loss": 0.5001, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 6.038588235294117, | |
| "grad_norm": 1.7734375, | |
| "learning_rate": 3.7648513734518856e-05, | |
| "loss": 0.5033, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 6.039764705882353, | |
| "grad_norm": 1.734375, | |
| "learning_rate": 3.756452229599244e-05, | |
| "loss": 0.4786, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 6.0409411764705885, | |
| "grad_norm": 1.7421875, | |
| "learning_rate": 3.748034067953561e-05, | |
| "loss": 0.406, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 6.042117647058824, | |
| "grad_norm": 1.65625, | |
| "learning_rate": 3.7395970159330174e-05, | |
| "loss": 0.3955, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 6.043294117647059, | |
| "grad_norm": 1.6171875, | |
| "learning_rate": 3.731141201241719e-05, | |
| "loss": 0.4052, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 6.044470588235294, | |
| "grad_norm": 1.4921875, | |
| "learning_rate": 3.7226667518677654e-05, | |
| "loss": 0.3364, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 6.04564705882353, | |
| "grad_norm": 1.4921875, | |
| "learning_rate": 3.714173796081314e-05, | |
| "loss": 0.3108, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 6.046823529411765, | |
| "grad_norm": 1.484375, | |
| "learning_rate": 3.705662462432636e-05, | |
| "loss": 0.3072, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 6.048, | |
| "grad_norm": 1.3046875, | |
| "learning_rate": 3.697132879750174e-05, | |
| "loss": 0.2657, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 6.049176470588235, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 3.688585177138586e-05, | |
| "loss": 0.2447, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 6.05035294117647, | |
| "grad_norm": 1.359375, | |
| "learning_rate": 3.680019483976799e-05, | |
| "loss": 0.2403, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 6.051529411764706, | |
| "grad_norm": 1.25, | |
| "learning_rate": 3.6714359299160426e-05, | |
| "loss": 0.232, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 6.052705882352941, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 3.662834644877897e-05, | |
| "loss": 0.2306, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 6.0538823529411765, | |
| "grad_norm": 1.4296875, | |
| "learning_rate": 3.654215759052314e-05, | |
| "loss": 0.2274, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 7.000705882352941, | |
| "grad_norm": 2.375, | |
| "learning_rate": 3.6455794028956575e-05, | |
| "loss": 0.375, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 7.001882352941177, | |
| "grad_norm": 1.6953125, | |
| "learning_rate": 3.636925707128721e-05, | |
| "loss": 0.3472, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 7.003058823529412, | |
| "grad_norm": 1.40625, | |
| "learning_rate": 3.628254802734754e-05, | |
| "loss": 0.2861, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 7.004235294117647, | |
| "grad_norm": 1.3984375, | |
| "learning_rate": 3.619566820957477e-05, | |
| "loss": 0.2756, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 7.005411764705882, | |
| "grad_norm": 1.296875, | |
| "learning_rate": 3.610861893299095e-05, | |
| "loss": 0.2651, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 7.006588235294117, | |
| "grad_norm": 1.390625, | |
| "learning_rate": 3.6021401515183094e-05, | |
| "loss": 0.2609, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 7.007764705882353, | |
| "grad_norm": 1.296875, | |
| "learning_rate": 3.5934017276283186e-05, | |
| "loss": 0.254, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 7.0089411764705885, | |
| "grad_norm": 1.3828125, | |
| "learning_rate": 3.584646753894828e-05, | |
| "loss": 0.2516, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 7.010117647058824, | |
| "grad_norm": 1.3515625, | |
| "learning_rate": 3.575875362834039e-05, | |
| "loss": 0.2477, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 7.011294117647059, | |
| "grad_norm": 1.578125, | |
| "learning_rate": 3.567087687210648e-05, | |
| "loss": 0.3002, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 7.012470588235294, | |
| "grad_norm": 1.46875, | |
| "learning_rate": 3.558283860035839e-05, | |
| "loss": 0.3033, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 7.01364705882353, | |
| "grad_norm": 1.5, | |
| "learning_rate": 3.549464014565265e-05, | |
| "loss": 0.299, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 7.014823529411765, | |
| "grad_norm": 1.5703125, | |
| "learning_rate": 3.540628284297033e-05, | |
| "loss": 0.2998, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 7.016, | |
| "grad_norm": 1.8515625, | |
| "learning_rate": 3.531776802969686e-05, | |
| "loss": 0.3264, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 7.017176470588235, | |
| "grad_norm": 1.7890625, | |
| "learning_rate": 3.522909704560178e-05, | |
| "loss": 0.3858, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 7.01835294117647, | |
| "grad_norm": 1.7578125, | |
| "learning_rate": 3.51402712328184e-05, | |
| "loss": 0.3708, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 7.019529411764706, | |
| "grad_norm": 1.6796875, | |
| "learning_rate": 3.505129193582357e-05, | |
| "loss": 0.369, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 7.020705882352941, | |
| "grad_norm": 1.7109375, | |
| "learning_rate": 3.496216050141728e-05, | |
| "loss": 0.3723, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 7.0218823529411765, | |
| "grad_norm": 1.8125, | |
| "learning_rate": 3.48728782787023e-05, | |
| "loss": 0.3906, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 7.023058823529412, | |
| "grad_norm": 1.78125, | |
| "learning_rate": 3.478344661906374e-05, | |
| "loss": 0.3922, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 7.024235294117647, | |
| "grad_norm": 1.7734375, | |
| "learning_rate": 3.46938668761486e-05, | |
| "loss": 0.3858, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 7.025411764705883, | |
| "grad_norm": 1.859375, | |
| "learning_rate": 3.460414040584528e-05, | |
| "loss": 0.3885, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 7.026588235294118, | |
| "grad_norm": 1.7890625, | |
| "learning_rate": 3.4514268566263075e-05, | |
| "loss": 0.3851, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 7.027764705882353, | |
| "grad_norm": 1.7265625, | |
| "learning_rate": 3.442425271771158e-05, | |
| "loss": 0.367, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 7.028941176470588, | |
| "grad_norm": 1.71875, | |
| "learning_rate": 3.4334094222680175e-05, | |
| "loss": 0.352, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 7.030117647058823, | |
| "grad_norm": 1.8359375, | |
| "learning_rate": 3.4243794445817295e-05, | |
| "loss": 0.348, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 7.031294117647059, | |
| "grad_norm": 1.859375, | |
| "learning_rate": 3.415335475390986e-05, | |
| "loss": 0.3559, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 7.032470588235294, | |
| "grad_norm": 1.7265625, | |
| "learning_rate": 3.406277651586256e-05, | |
| "loss": 0.3455, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 7.033647058823529, | |
| "grad_norm": 1.625, | |
| "learning_rate": 3.397206110267713e-05, | |
| "loss": 0.3473, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 7.0348235294117645, | |
| "grad_norm": 1.671875, | |
| "learning_rate": 3.388120988743159e-05, | |
| "loss": 0.3312, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 7.036, | |
| "grad_norm": 1.6484375, | |
| "learning_rate": 3.3790224245259494e-05, | |
| "loss": 0.3261, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 7.037176470588236, | |
| "grad_norm": 1.578125, | |
| "learning_rate": 3.369910555332909e-05, | |
| "loss": 0.3102, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 7.038352941176471, | |
| "grad_norm": 1.609375, | |
| "learning_rate": 3.3607855190822466e-05, | |
| "loss": 0.3055, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 7.039529411764706, | |
| "grad_norm": 1.5703125, | |
| "learning_rate": 3.351647453891472e-05, | |
| "loss": 0.2965, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 7.040705882352941, | |
| "grad_norm": 1.5390625, | |
| "learning_rate": 3.3424964980752996e-05, | |
| "loss": 0.2479, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 7.041882352941176, | |
| "grad_norm": 1.3984375, | |
| "learning_rate": 3.333332790143558e-05, | |
| "loss": 0.2448, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 7.043058823529412, | |
| "grad_norm": 1.4375, | |
| "learning_rate": 3.3241564687990956e-05, | |
| "loss": 0.2352, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 7.044235294117647, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 3.3149676729356754e-05, | |
| "loss": 0.2047, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 7.045411764705882, | |
| "grad_norm": 1.3125, | |
| "learning_rate": 3.3057665416358786e-05, | |
| "loss": 0.1881, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 7.046588235294117, | |
| "grad_norm": 1.3359375, | |
| "learning_rate": 3.296553214168995e-05, | |
| "loss": 0.1929, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 7.047764705882353, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 3.287327829988919e-05, | |
| "loss": 0.1704, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 7.0489411764705885, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 3.278090528732034e-05, | |
| "loss": 0.1558, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 7.050117647058824, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.268841450215105e-05, | |
| "loss": 0.1584, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 7.051294117647059, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 3.259580734433155e-05, | |
| "loss": 0.1529, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 7.052470588235294, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 3.250308521557351e-05, | |
| "loss": 0.1409, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 7.05364705882353, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 3.241024951932885e-05, | |
| "loss": 0.1419, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 8.000470588235293, | |
| "grad_norm": 2.390625, | |
| "learning_rate": 3.231730166076838e-05, | |
| "loss": 0.2228, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 8.00164705882353, | |
| "grad_norm": 1.6015625, | |
| "learning_rate": 3.2224243046760725e-05, | |
| "loss": 0.2426, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 8.002823529411765, | |
| "grad_norm": 1.265625, | |
| "learning_rate": 3.2131075085850794e-05, | |
| "loss": 0.1876, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 8.004, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 3.203779918823866e-05, | |
| "loss": 0.1648, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 8.005176470588236, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 3.194441676575811e-05, | |
| "loss": 0.1577, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 8.00635294117647, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 3.185092923185532e-05, | |
| "loss": 0.1551, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 8.007529411764706, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.175733800156741e-05, | |
| "loss": 0.1536, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 8.008705882352942, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.1663644491501106e-05, | |
| "loss": 0.1496, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 8.009882352941176, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 3.156985011981121e-05, | |
| "loss": 0.1465, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 8.011058823529412, | |
| "grad_norm": 1.3203125, | |
| "learning_rate": 3.14759563061792e-05, | |
| "loss": 0.1675, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 8.012235294117646, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 3.13819644717917e-05, | |
| "loss": 0.1865, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 8.013411764705882, | |
| "grad_norm": 1.328125, | |
| "learning_rate": 3.1287876039319015e-05, | |
| "loss": 0.1816, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 8.014588235294118, | |
| "grad_norm": 1.265625, | |
| "learning_rate": 3.119369243289353e-05, | |
| "loss": 0.1751, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 8.015764705882352, | |
| "grad_norm": 1.546875, | |
| "learning_rate": 3.1099415078088235e-05, | |
| "loss": 0.1863, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 8.016941176470588, | |
| "grad_norm": 1.5859375, | |
| "learning_rate": 3.100504540189507e-05, | |
| "loss": 0.2351, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 8.018117647058823, | |
| "grad_norm": 1.3984375, | |
| "learning_rate": 3.091058483270337e-05, | |
| "loss": 0.2202, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 8.019294117647059, | |
| "grad_norm": 1.296875, | |
| "learning_rate": 3.081603480027826e-05, | |
| "loss": 0.2134, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 8.020470588235295, | |
| "grad_norm": 1.5078125, | |
| "learning_rate": 3.072139673573895e-05, | |
| "loss": 0.2175, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 8.021647058823529, | |
| "grad_norm": 1.5546875, | |
| "learning_rate": 3.0626672071537156e-05, | |
| "loss": 0.2275, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 8.022823529411765, | |
| "grad_norm": 1.5, | |
| "learning_rate": 3.053186224143533e-05, | |
| "loss": 0.2332, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 8.024, | |
| "grad_norm": 1.5, | |
| "learning_rate": 3.043696868048505e-05, | |
| "loss": 0.2287, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 8.025176470588235, | |
| "grad_norm": 1.5234375, | |
| "learning_rate": 3.0341992825005222e-05, | |
| "loss": 0.2279, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 8.026352941176471, | |
| "grad_norm": 1.6171875, | |
| "learning_rate": 3.0246936112560377e-05, | |
| "loss": 0.2246, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 8.027529411764705, | |
| "grad_norm": 1.5703125, | |
| "learning_rate": 3.015179998193891e-05, | |
| "loss": 0.2162, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 8.028705882352941, | |
| "grad_norm": 1.421875, | |
| "learning_rate": 3.0056585873131288e-05, | |
| "loss": 0.2078, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 8.029882352941176, | |
| "grad_norm": 1.4375, | |
| "learning_rate": 2.9961295227308295e-05, | |
| "loss": 0.2097, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 8.031058823529412, | |
| "grad_norm": 1.5, | |
| "learning_rate": 2.986592948679914e-05, | |
| "loss": 0.2074, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 8.032235294117648, | |
| "grad_norm": 1.390625, | |
| "learning_rate": 2.977049009506971e-05, | |
| "loss": 0.2107, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 8.033411764705882, | |
| "grad_norm": 1.375, | |
| "learning_rate": 2.9674978496700662e-05, | |
| "loss": 0.2028, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 8.034588235294118, | |
| "grad_norm": 1.40625, | |
| "learning_rate": 2.95793961373656e-05, | |
| "loss": 0.2001, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 8.035764705882354, | |
| "grad_norm": 1.3203125, | |
| "learning_rate": 2.9483744463809164e-05, | |
| "loss": 0.1961, | |
| "memory/device_mem_reserved(gib)": 60.75, | |
| "memory/max_mem_active(gib)": 59.81, | |
| "memory/max_mem_allocated(gib)": 59.81, | |
| "step": 4000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 8500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.383389610776986e+19, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |