Invalid JSON: Unexpected token 'N', ..."ad_norm": NaN,
"... is not valid JSON
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 102000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.014705882352941176, | |
| "grad_norm": 2.769134283065796, | |
| "learning_rate": 4.975490196078432e-05, | |
| "loss": 2.0395, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.029411764705882353, | |
| "grad_norm": 3.531428813934326, | |
| "learning_rate": 4.9509803921568634e-05, | |
| "loss": 1.5943, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 1000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.04411764705882353, | |
| "grad_norm": 7.56320858001709, | |
| "learning_rate": 4.9264705882352944e-05, | |
| "loss": 1.4979, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 1500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.058823529411764705, | |
| "grad_norm": 5.256613254547119, | |
| "learning_rate": 4.901960784313725e-05, | |
| "loss": 1.3957, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 2000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.07352941176470588, | |
| "grad_norm": 4.642207145690918, | |
| "learning_rate": 4.877450980392157e-05, | |
| "loss": 1.3589, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 2500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.08823529411764706, | |
| "grad_norm": 3.366818904876709, | |
| "learning_rate": 4.8529411764705885e-05, | |
| "loss": 1.3006, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 3000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.10294117647058823, | |
| "grad_norm": 4.411001205444336, | |
| "learning_rate": 4.82843137254902e-05, | |
| "loss": 1.2582, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 3500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.11764705882352941, | |
| "grad_norm": 5.188011646270752, | |
| "learning_rate": 4.803921568627452e-05, | |
| "loss": 1.1948, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 4000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.1323529411764706, | |
| "grad_norm": 3.674046754837036, | |
| "learning_rate": 4.7794117647058826e-05, | |
| "loss": 1.2398, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 4500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.14705882352941177, | |
| "grad_norm": 6.163153171539307, | |
| "learning_rate": 4.7549019607843135e-05, | |
| "loss": 1.2257, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 5000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.16176470588235295, | |
| "grad_norm": 5.589200019836426, | |
| "learning_rate": 4.730392156862745e-05, | |
| "loss": 1.1747, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 5500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.17647058823529413, | |
| "grad_norm": 5.379985332489014, | |
| "learning_rate": 4.705882352941177e-05, | |
| "loss": 1.1922, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 6000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.19117647058823528, | |
| "grad_norm": 1.5078221559524536, | |
| "learning_rate": 4.681372549019608e-05, | |
| "loss": 1.1355, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 6500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.20588235294117646, | |
| "grad_norm": 2.732482433319092, | |
| "learning_rate": 4.656862745098039e-05, | |
| "loss": 1.1359, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 7000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.22058823529411764, | |
| "grad_norm": 5.508218765258789, | |
| "learning_rate": 4.632352941176471e-05, | |
| "loss": 1.0726, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 7500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.23529411764705882, | |
| "grad_norm": 6.273388385772705, | |
| "learning_rate": 4.607843137254902e-05, | |
| "loss": 1.0777, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 8000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 7.923452854156494, | |
| "learning_rate": 4.5833333333333334e-05, | |
| "loss": 1.0869, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 8500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.2647058823529412, | |
| "grad_norm": 4.915501117706299, | |
| "learning_rate": 4.558823529411765e-05, | |
| "loss": 1.0897, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 9000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.27941176470588236, | |
| "grad_norm": 4.274153232574463, | |
| "learning_rate": 4.5343137254901966e-05, | |
| "loss": 1.0816, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 9500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.29411764705882354, | |
| "grad_norm": 6.414577960968018, | |
| "learning_rate": 4.5098039215686275e-05, | |
| "loss": 1.0721, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 10000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.3088235294117647, | |
| "grad_norm": 5.6046342849731445, | |
| "learning_rate": 4.485294117647059e-05, | |
| "loss": 1.052, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 10500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.3235294117647059, | |
| "grad_norm": 6.429250717163086, | |
| "learning_rate": 4.460784313725491e-05, | |
| "loss": 0.9923, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 11000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.3382352941176471, | |
| "grad_norm": 3.2989513874053955, | |
| "learning_rate": 4.4362745098039216e-05, | |
| "loss": 1.0657, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 11500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.35294117647058826, | |
| "grad_norm": 6.195382118225098, | |
| "learning_rate": 4.411764705882353e-05, | |
| "loss": 1.0001, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 12000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.36764705882352944, | |
| "grad_norm": 2.9487133026123047, | |
| "learning_rate": 4.387254901960784e-05, | |
| "loss": 1.0021, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 12500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.38235294117647056, | |
| "grad_norm": 7.185413837432861, | |
| "learning_rate": 4.362745098039216e-05, | |
| "loss": 1.0065, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 13000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.39705882352941174, | |
| "grad_norm": 5.732925891876221, | |
| "learning_rate": 4.3382352941176474e-05, | |
| "loss": 1.0212, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 13500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.4117647058823529, | |
| "grad_norm": 6.557607650756836, | |
| "learning_rate": 4.313725490196079e-05, | |
| "loss": 0.986, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 14000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.4264705882352941, | |
| "grad_norm": 4.9897613525390625, | |
| "learning_rate": 4.28921568627451e-05, | |
| "loss": 0.977, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 14500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.4411764705882353, | |
| "grad_norm": 2.1532859802246094, | |
| "learning_rate": 4.2647058823529415e-05, | |
| "loss": 0.9729, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 15000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.45588235294117646, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.2401960784313724e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 15500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.215686274509804e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 16000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.4852941176470588, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.1911764705882356e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 16500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 17000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.5147058823529411, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.142156862745099e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 17500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.5294117647058824, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.11764705882353e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 18000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.5441176470588235, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.0931372549019607e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 18500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.5588235294117647, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.068627450980392e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 19000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.5735294117647058, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.044117647058824e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 19500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.0196078431372555e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 20000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.6029411764705882, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.9950980392156864e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 20500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.6176470588235294, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.970588235294117e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 21000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.6323529411764706, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.946078431372549e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 21500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.6470588235294118, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.9215686274509805e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 22000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.6617647058823529, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.897058823529412e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 22500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.6764705882352942, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.872549019607844e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 23000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.6911764705882353, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.8480392156862746e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 23500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.8235294117647055e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 24000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.7205882352941176, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.799019607843137e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 24500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.7352941176470589, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.774509803921569e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 25000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 25500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.7647058823529411, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.725490196078432e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 26000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.7794117647058824, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.700980392156863e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 26500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.7941176470588235, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.6764705882352945e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 27000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.8088235294117647, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.6519607843137254e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 27500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.8235294117647058, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.627450980392157e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 28000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.8382352941176471, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.6029411764705886e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 28500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.8529411764705882, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.5784313725490195e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 29000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.8676470588235294, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.553921568627451e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 29500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.8823529411764706, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.529411764705883e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 30000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.8970588235294118, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.5049019607843136e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 30500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.9117647058823529, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.480392156862745e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 31000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.9264705882352942, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.455882352941177e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 31500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.431372549019608e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 32000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.9558823529411765, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.4068627450980394e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 32500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.9705882352941176, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.382352941176471e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 33000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 0.9852941176470589, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.357843137254902e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 33500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 34000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.0147058823529411, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.308823529411765e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 34500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.0294117647058822, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.284313725490196e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 35000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.0441176470588236, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.2598039215686276e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 35500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.0588235294117647, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.235294117647059e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 36000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.0735294117647058, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.210784313725491e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 36500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.088235294117647, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.186274509803922e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 37000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.1029411764705883, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.161764705882353e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 37500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.1176470588235294, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.137254901960784e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 38000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.1323529411764706, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.112745098039216e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 38500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.1470588235294117, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.0882352941176475e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 39000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.161764705882353, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.063725490196079e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 39500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.1764705882352942, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.0392156862745097e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 40000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.1911764705882353, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.0147058823529413e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 40500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.2058823529411764, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.9901960784313725e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 41000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.2205882352941178, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.965686274509804e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 41500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.2352941176470589, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.9411764705882354e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 42000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.916666666666667e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 42500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.2647058823529411, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.8921568627450986e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 43000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.2794117647058822, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.8676470588235295e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 43500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.2941176470588236, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.8431372549019608e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 44000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.3088235294117647, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.8186274509803924e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 44500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.3235294117647058, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.7941176470588236e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 45000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.3382352941176472, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.7696078431372552e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 45500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.3529411764705883, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.7450980392156865e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 46000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.3676470588235294, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.7205882352941174e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 46500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.3823529411764706, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.696078431372549e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 47000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.3970588235294117, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.6715686274509806e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 47500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.4117647058823528, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.647058823529412e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 48000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.4264705882352942, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.6225490196078435e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 48500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.4411764705882353, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.5980392156862747e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 49000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.4558823529411764, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.5735294117647057e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 49500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.4705882352941178, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.5490196078431373e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 50000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.4852941176470589, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.5245098039215685e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 50500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.5e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 51000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.5147058823529411, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.4754901960784317e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 51500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.5294117647058822, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.4509803921568626e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 52000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.5441176470588234, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.4264705882352942e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 52500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.5588235294117647, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.401960784313726e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 53000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.5735294117647058, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.3774509803921568e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 53500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.5882352941176472, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.3529411764705884e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 54000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.6029411764705883, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.3284313725490196e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 54500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.6176470588235294, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.303921568627451e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 55000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.6323529411764706, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.2794117647058825e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 55500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.6470588235294117, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.2549019607843138e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 56000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.6617647058823528, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.2303921568627454e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 56500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.6764705882352942, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.2058823529411766e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 57000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.6911764705882353, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.181372549019608e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 57500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.7058823529411766, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.1568627450980395e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 58000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.7205882352941178, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.1323529411764707e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 58500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.7352941176470589, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.107843137254902e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 59000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.0833333333333336e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 59500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.7647058823529411, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.058823529411765e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 60000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.7794117647058822, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.034313725490196e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 60500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.7941176470588234, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.0098039215686277e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 61000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.8088235294117647, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.9852941176470586e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 61500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.8235294117647058, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.9607843137254903e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 62000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.8382352941176472, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.936274509803922e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 62500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.8529411764705883, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.9117647058823528e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 63000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.8676470588235294, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.8872549019607844e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 63500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.8823529411764706, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.862745098039216e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 64000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.8970588235294117, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.8382352941176472e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 64500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.9117647058823528, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.8137254901960785e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 65000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.9264705882352942, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.7892156862745098e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 65500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.9411764705882353, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.7647058823529414e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 66000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.9558823529411766, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.7401960784313726e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 66500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.9705882352941178, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.715686274509804e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 67000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 1.9852941176470589, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.6911764705882355e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 67500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 68000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.014705882352941, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.642156862745098e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 68500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.0294117647058822, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.6176470588235296e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 69000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.0441176470588234, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.593137254901961e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 69500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.0588235294117645, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.568627450980392e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 70000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.073529411764706, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.5441176470588237e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 70500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.088235294117647, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.5196078431372548e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 71000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.1029411764705883, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.4950980392156863e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 71500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.1176470588235294, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.4705882352941177e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 72000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.1323529411764706, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.4460784313725493e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 72500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.1470588235294117, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.4215686274509804e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 73000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.161764705882353, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.3970588235294118e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 73500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.176470588235294, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.3725490196078432e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 74000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.1911764705882355, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.3480392156862745e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 74500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.2058823529411766, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.323529411764706e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 75000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.2205882352941178, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.2990196078431374e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 75500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.235294117647059, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.2745098039215686e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 76000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.25e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 76500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.264705882352941, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.2254901960784313e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 77000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.2794117647058822, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.200980392156863e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 77500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.2941176470588234, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.1764705882352942e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 78000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.3088235294117645, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.1519607843137254e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 78500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.323529411764706, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.1274509803921569e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 79000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.338235294117647, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.1029411764705883e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 79500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.3529411764705883, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.0784313725490197e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 80000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.3676470588235294, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.053921568627451e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 80500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.3823529411764706, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.0294117647058824e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 81000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.3970588235294117, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.0049019607843139e-05, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 81500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.411764705882353, | |
| "grad_norm": NaN, | |
| "learning_rate": 9.803921568627451e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 82000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.426470588235294, | |
| "grad_norm": NaN, | |
| "learning_rate": 9.558823529411764e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 82500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.4411764705882355, | |
| "grad_norm": NaN, | |
| "learning_rate": 9.31372549019608e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 83000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.4558823529411766, | |
| "grad_norm": NaN, | |
| "learning_rate": 9.068627450980392e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 83500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.4705882352941178, | |
| "grad_norm": NaN, | |
| "learning_rate": 8.823529411764707e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 84000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.485294117647059, | |
| "grad_norm": NaN, | |
| "learning_rate": 8.57843137254902e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 84500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": NaN, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 85000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.514705882352941, | |
| "grad_norm": NaN, | |
| "learning_rate": 8.088235294117648e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 85500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.5294117647058822, | |
| "grad_norm": NaN, | |
| "learning_rate": 7.84313725490196e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 86000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.5441176470588234, | |
| "grad_norm": NaN, | |
| "learning_rate": 7.598039215686274e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 86500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.5588235294117645, | |
| "grad_norm": NaN, | |
| "learning_rate": 7.3529411764705884e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 87000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.5735294117647056, | |
| "grad_norm": NaN, | |
| "learning_rate": 7.107843137254902e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 87500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.588235294117647, | |
| "grad_norm": NaN, | |
| "learning_rate": 6.862745098039216e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 88000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.6029411764705883, | |
| "grad_norm": NaN, | |
| "learning_rate": 6.61764705882353e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 88500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.6176470588235294, | |
| "grad_norm": NaN, | |
| "learning_rate": 6.372549019607843e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 89000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.6323529411764706, | |
| "grad_norm": NaN, | |
| "learning_rate": 6.127450980392157e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 89500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.6470588235294117, | |
| "grad_norm": NaN, | |
| "learning_rate": 5.882352941176471e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 90000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.661764705882353, | |
| "grad_norm": NaN, | |
| "learning_rate": 5.637254901960784e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 90500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.6764705882352944, | |
| "grad_norm": NaN, | |
| "learning_rate": 5.392156862745099e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 91000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.6911764705882355, | |
| "grad_norm": NaN, | |
| "learning_rate": 5.147058823529412e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 91500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.7058823529411766, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.901960784313726e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 92000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.7205882352941178, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.65686274509804e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 92500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.735294117647059, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.411764705882353e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 93000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 93500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.764705882352941, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.92156862745098e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 94000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.7794117647058822, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.6764705882352942e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 94500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.7941176470588234, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.431372549019608e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 95000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.8088235294117645, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.1862745098039216e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 95500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.8235294117647056, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.9411764705882355e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 96000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.838235294117647, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.6960784313725493e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 96500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.8529411764705883, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.450980392156863e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 97000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.8676470588235294, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.2058823529411767e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 97500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.8823529411764706, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.96078431372549e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 98000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.8970588235294117, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.715686274509804e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 98500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.911764705882353, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.4705882352941177e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 99000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.9264705882352944, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.2254901960784314e-06, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 99500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.9411764705882355, | |
| "grad_norm": NaN, | |
| "learning_rate": 9.80392156862745e-07, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 100000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.9558823529411766, | |
| "grad_norm": NaN, | |
| "learning_rate": 7.352941176470589e-07, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 100500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.9705882352941178, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.901960784313725e-07, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 101000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 2.985294117647059, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.4509803921568627e-07, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 101500, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": NaN, | |
| "learning_rate": 0.0, | |
| "loss": NaN, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 102000, | |
| "total_memory_available (GB)": 126.62 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "max_memory_allocated (GB)": 2.1, | |
| "memory_allocated (GB)": 1.46, | |
| "step": 102000, | |
| "total_flos": 6.324139790696448e+19, | |
| "total_memory_available (GB)": 126.62, | |
| "train_loss": NaN, | |
| "train_runtime": 4358.6451, | |
| "train_samples_per_second": 187.213, | |
| "train_steps_per_second": 23.402 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 102000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.324139790696448e+19, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |