| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "global_step": 61200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.9754901960784317e-05, | |
| "loss": 0.2061, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 2.9509803921568626e-05, | |
| "loss": 0.1319, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.9264705882352942e-05, | |
| "loss": 0.1099, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.9019607843137255e-05, | |
| "loss": 0.1096, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 2.877450980392157e-05, | |
| "loss": 0.1035, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 2.8529411764705883e-05, | |
| "loss": 0.1015, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 2.8284313725490196e-05, | |
| "loss": 0.0842, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 2.8039215686274512e-05, | |
| "loss": 0.0844, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 2.7794117647058824e-05, | |
| "loss": 0.0702, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.7549019607843137e-05, | |
| "loss": 0.0766, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.730392156862745e-05, | |
| "loss": 0.0811, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.7058823529411766e-05, | |
| "loss": 0.072, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.6813725490196082e-05, | |
| "loss": 0.0667, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.656862745098039e-05, | |
| "loss": 0.0753, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 2.6323529411764707e-05, | |
| "loss": 0.0689, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 2.607843137254902e-05, | |
| "loss": 0.0614, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.5833333333333336e-05, | |
| "loss": 0.0643, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 2.5588235294117648e-05, | |
| "loss": 0.0687, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.534313725490196e-05, | |
| "loss": 0.0674, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 2.5098039215686277e-05, | |
| "loss": 0.0588, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 2.485294117647059e-05, | |
| "loss": 0.0676, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.4607843137254902e-05, | |
| "loss": 0.0657, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.4362745098039215e-05, | |
| "loss": 0.0595, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.411764705882353e-05, | |
| "loss": 0.0641, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 2.3872549019607843e-05, | |
| "loss": 0.0573, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 2.3627450980392156e-05, | |
| "loss": 0.0446, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 2.3382352941176472e-05, | |
| "loss": 0.0403, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 2.3137254901960785e-05, | |
| "loss": 0.0389, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 2.2892156862745097e-05, | |
| "loss": 0.0438, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.2647058823529413e-05, | |
| "loss": 0.0439, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.2401960784313726e-05, | |
| "loss": 0.0495, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.2156862745098042e-05, | |
| "loss": 0.047, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.191176470588235e-05, | |
| "loss": 0.0501, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.1666666666666667e-05, | |
| "loss": 0.0517, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.1421568627450983e-05, | |
| "loss": 0.0452, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.1176470588235296e-05, | |
| "loss": 0.0459, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.0931372549019608e-05, | |
| "loss": 0.0451, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.068627450980392e-05, | |
| "loss": 0.0418, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.0441176470588237e-05, | |
| "loss": 0.0446, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.019607843137255e-05, | |
| "loss": 0.0478, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 1.9950980392156862e-05, | |
| "loss": 0.0387, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 1.9705882352941178e-05, | |
| "loss": 0.04, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 1.946078431372549e-05, | |
| "loss": 0.048, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 1.9215686274509803e-05, | |
| "loss": 0.0382, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.8970588235294116e-05, | |
| "loss": 0.0402, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.8725490196078432e-05, | |
| "loss": 0.0409, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.8480392156862748e-05, | |
| "loss": 0.0403, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.8235294117647057e-05, | |
| "loss": 0.0577, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.7990196078431373e-05, | |
| "loss": 0.0434, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.7745098039215686e-05, | |
| "loss": 0.0304, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.7500000000000002e-05, | |
| "loss": 0.0227, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.7254901960784314e-05, | |
| "loss": 0.0307, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.7009803921568627e-05, | |
| "loss": 0.0266, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.6764705882352943e-05, | |
| "loss": 0.0319, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.6519607843137256e-05, | |
| "loss": 0.0324, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.627450980392157e-05, | |
| "loss": 0.0256, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.602941176470588e-05, | |
| "loss": 0.0292, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.5784313725490197e-05, | |
| "loss": 0.0284, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 1.5539215686274513e-05, | |
| "loss": 0.0307, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 1.5294117647058822e-05, | |
| "loss": 0.0333, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 1.5049019607843138e-05, | |
| "loss": 0.0316, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 1.4803921568627453e-05, | |
| "loss": 0.0298, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 1.4558823529411765e-05, | |
| "loss": 0.0255, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 1.431372549019608e-05, | |
| "loss": 0.0274, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 1.4068627450980392e-05, | |
| "loss": 0.0268, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 1.3823529411764705e-05, | |
| "loss": 0.0263, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 1.357843137254902e-05, | |
| "loss": 0.0208, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.0272, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 1.3088235294117648e-05, | |
| "loss": 0.0245, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 1.284313725490196e-05, | |
| "loss": 0.0245, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.2598039215686275e-05, | |
| "loss": 0.0239, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.2352941176470587e-05, | |
| "loss": 0.0271, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 1.2107843137254903e-05, | |
| "loss": 0.0226, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 1.1862745098039216e-05, | |
| "loss": 0.0216, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 1.161764705882353e-05, | |
| "loss": 0.0157, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 1.1372549019607843e-05, | |
| "loss": 0.0137, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 1.1127450980392157e-05, | |
| "loss": 0.0122, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 1.0882352941176471e-05, | |
| "loss": 0.0153, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 1.0637254901960786e-05, | |
| "loss": 0.018, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 1.0392156862745098e-05, | |
| "loss": 0.018, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 1.0147058823529413e-05, | |
| "loss": 0.0169, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 9.901960784313725e-06, | |
| "loss": 0.0138, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 9.65686274509804e-06, | |
| "loss": 0.0171, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 9.411764705882354e-06, | |
| "loss": 0.0139, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 9.166666666666668e-06, | |
| "loss": 0.0123, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 8.92156862745098e-06, | |
| "loss": 0.0172, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 8.676470588235295e-06, | |
| "loss": 0.0168, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 8.431372549019608e-06, | |
| "loss": 0.0146, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 8.18627450980392e-06, | |
| "loss": 0.0155, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 7.941176470588236e-06, | |
| "loss": 0.0148, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 7.696078431372549e-06, | |
| "loss": 0.0187, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 7.450980392156863e-06, | |
| "loss": 0.0124, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 7.205882352941177e-06, | |
| "loss": 0.0119, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 6.960784313725491e-06, | |
| "loss": 0.0125, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 6.7156862745098045e-06, | |
| "loss": 0.0157, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 6.470588235294118e-06, | |
| "loss": 0.0149, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 6.225490196078432e-06, | |
| "loss": 0.0179, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 5.980392156862746e-06, | |
| "loss": 0.0142, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 5.735294117647058e-06, | |
| "loss": 0.0111, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 5.490196078431373e-06, | |
| "loss": 0.0056, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 5.245098039215686e-06, | |
| "loss": 0.0051, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 4.9999999999999996e-06, | |
| "loss": 0.0101, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 4.754901960784314e-06, | |
| "loss": 0.0069, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 4.509803921568627e-06, | |
| "loss": 0.0069, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 4.264705882352941e-06, | |
| "loss": 0.0091, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 4.019607843137255e-06, | |
| "loss": 0.0068, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 3.7745098039215686e-06, | |
| "loss": 0.0079, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 3.5294117647058825e-06, | |
| "loss": 0.0139, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 3.2843137254901964e-06, | |
| "loss": 0.0051, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 3.03921568627451e-06, | |
| "loss": 0.0104, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 2.7941176470588237e-06, | |
| "loss": 0.0068, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 2.5490196078431376e-06, | |
| "loss": 0.0102, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 2.303921568627451e-06, | |
| "loss": 0.0107, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 2.058823529411765e-06, | |
| "loss": 0.0093, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 1.8137254901960784e-06, | |
| "loss": 0.0059, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 1.5686274509803923e-06, | |
| "loss": 0.0077, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 1.323529411764706e-06, | |
| "loss": 0.0074, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 1.0784313725490197e-06, | |
| "loss": 0.0076, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 8.333333333333333e-07, | |
| "loss": 0.0062, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 5.882352941176471e-07, | |
| "loss": 0.0073, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 3.431372549019608e-07, | |
| "loss": 0.0088, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 9.803921568627452e-08, | |
| "loss": 0.0053, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 61200, | |
| "total_flos": 2.401553971865549e+16, | |
| "train_loss": 0.0358083400544193, | |
| "train_runtime": 10226.935, | |
| "train_samples_per_second": 23.937, | |
| "train_steps_per_second": 5.984 | |
| } | |
| ], | |
| "max_steps": 61200, | |
| "num_train_epochs": 5, | |
| "total_flos": 2.401553971865549e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |