{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13, "learning_rate": 5.333333333333333e-05, "loss": 0.8383, "step": 2 }, { "epoch": 0.27, "learning_rate": 0.00010666666666666667, "loss": 0.8509, "step": 4 }, { "epoch": 0.4, "learning_rate": 0.00016, "loss": 0.8373, "step": 6 }, { "epoch": 0.53, "learning_rate": 0.00021333333333333333, "loss": 0.8067, "step": 8 }, { "epoch": 0.67, "learning_rate": 0.0002666666666666667, "loss": 0.5774, "step": 10 }, { "epoch": 0.8, "learning_rate": 0.00032, "loss": 0.5267, "step": 12 }, { "epoch": 0.93, "learning_rate": 0.0003733333333333334, "loss": 0.551, "step": 14 }, { "epoch": 1.07, "learning_rate": 0.00039703703703703705, "loss": 0.4659, "step": 16 }, { "epoch": 1.2, "learning_rate": 0.0003911111111111111, "loss": 0.4995, "step": 18 }, { "epoch": 1.33, "learning_rate": 0.0003851851851851852, "loss": 0.5013, "step": 20 }, { "epoch": 1.47, "learning_rate": 0.0003792592592592593, "loss": 0.3762, "step": 22 }, { "epoch": 1.6, "learning_rate": 0.0003733333333333334, "loss": 0.3964, "step": 24 }, { "epoch": 1.73, "learning_rate": 0.00036740740740740744, "loss": 0.4134, "step": 26 }, { "epoch": 1.87, "learning_rate": 0.0003614814814814815, "loss": 0.3222, "step": 28 }, { "epoch": 2.0, "learning_rate": 0.00035555555555555557, "loss": 0.2872, "step": 30 }, { "epoch": 2.13, "learning_rate": 0.00034962962962962964, "loss": 0.3529, "step": 32 }, { "epoch": 2.27, "learning_rate": 0.0003437037037037037, "loss": 0.282, "step": 34 }, { "epoch": 2.4, "learning_rate": 0.00033777777777777777, "loss": 0.2544, "step": 36 }, { "epoch": 2.53, "learning_rate": 0.00033185185185185184, "loss": 0.212, "step": 38 }, { "epoch": 2.67, "learning_rate": 0.0003259259259259259, "loss": 0.1252, "step": 40 }, { "epoch": 2.8, "learning_rate": 0.00032, "loss": 0.1933, "step": 42 }, { "epoch": 2.93, "learning_rate": 0.0003140740740740741, "loss": 0.1614, "step": 44 }, { "epoch": 3.07, "learning_rate": 0.00030814814814814816, "loss": 0.1694, "step": 46 }, { "epoch": 3.2, "learning_rate": 0.0003022222222222222, "loss": 0.1645, "step": 48 }, { "epoch": 3.33, "learning_rate": 0.0002962962962962963, "loss": 0.1532, "step": 50 }, { "epoch": 3.47, "learning_rate": 0.0002903703703703704, "loss": 0.1115, "step": 52 }, { "epoch": 3.6, "learning_rate": 0.0002844444444444445, "loss": 0.0873, "step": 54 }, { "epoch": 3.73, "learning_rate": 0.00027851851851851855, "loss": 0.1409, "step": 56 }, { "epoch": 3.87, "learning_rate": 0.0002725925925925926, "loss": 0.1363, "step": 58 }, { "epoch": 4.0, "learning_rate": 0.0002666666666666667, "loss": 0.12, "step": 60 }, { "epoch": 4.13, "learning_rate": 0.0002607407407407408, "loss": 0.0858, "step": 62 }, { "epoch": 4.27, "learning_rate": 0.0002548148148148148, "loss": 0.1151, "step": 64 }, { "epoch": 4.4, "learning_rate": 0.0002488888888888889, "loss": 0.0631, "step": 66 }, { "epoch": 4.53, "learning_rate": 0.00024296296296296297, "loss": 0.1468, "step": 68 }, { "epoch": 4.67, "learning_rate": 0.00023703703703703704, "loss": 0.1032, "step": 70 }, { "epoch": 4.8, "learning_rate": 0.0002311111111111111, "loss": 0.0626, "step": 72 }, { "epoch": 4.93, "learning_rate": 0.0002251851851851852, "loss": 0.0551, "step": 74 }, { "epoch": 5.07, "learning_rate": 0.00021925925925925927, "loss": 0.0717, "step": 76 }, { "epoch": 5.2, "learning_rate": 0.00021333333333333333, "loss": 0.0711, "step": 78 }, { "epoch": 5.33, "learning_rate": 0.0002074074074074074, "loss": 0.0717, "step": 80 }, { "epoch": 5.47, "learning_rate": 0.00020148148148148147, "loss": 0.0886, "step": 82 }, { "epoch": 5.6, "learning_rate": 0.00019555555555555556, "loss": 0.0527, "step": 84 }, { "epoch": 5.73, "learning_rate": 0.00018962962962962965, "loss": 0.0633, "step": 86 }, { "epoch": 5.87, "learning_rate": 0.00018370370370370372, "loss": 0.0424, "step": 88 }, { "epoch": 6.0, "learning_rate": 0.00017777777777777779, "loss": 0.0431, "step": 90 }, { "epoch": 6.13, "learning_rate": 0.00017185185185185185, "loss": 0.0538, "step": 92 }, { "epoch": 6.27, "learning_rate": 0.00016592592592592592, "loss": 0.0526, "step": 94 }, { "epoch": 6.4, "learning_rate": 0.00016, "loss": 0.055, "step": 96 }, { "epoch": 6.53, "learning_rate": 0.00015407407407407408, "loss": 0.0518, "step": 98 }, { "epoch": 6.67, "learning_rate": 0.00014814814814814815, "loss": 0.0484, "step": 100 }, { "epoch": 6.8, "learning_rate": 0.00014222222222222224, "loss": 0.0526, "step": 102 }, { "epoch": 6.93, "learning_rate": 0.0001362962962962963, "loss": 0.0544, "step": 104 }, { "epoch": 7.07, "learning_rate": 0.0001303703703703704, "loss": 0.036, "step": 106 }, { "epoch": 7.2, "learning_rate": 0.00012444444444444444, "loss": 0.0475, "step": 108 }, { "epoch": 7.33, "learning_rate": 0.00011851851851851852, "loss": 0.0466, "step": 110 }, { "epoch": 7.47, "learning_rate": 0.0001125925925925926, "loss": 0.0416, "step": 112 }, { "epoch": 7.6, "learning_rate": 0.00010666666666666667, "loss": 0.0387, "step": 114 }, { "epoch": 7.73, "learning_rate": 0.00010074074074074073, "loss": 0.0494, "step": 116 }, { "epoch": 7.87, "learning_rate": 9.481481481481483e-05, "loss": 0.0451, "step": 118 }, { "epoch": 8.0, "learning_rate": 8.888888888888889e-05, "loss": 0.0348, "step": 120 }, { "epoch": 8.13, "learning_rate": 8.296296296296296e-05, "loss": 0.0487, "step": 122 }, { "epoch": 8.27, "learning_rate": 7.703703703703704e-05, "loss": 0.0331, "step": 124 }, { "epoch": 8.4, "learning_rate": 7.111111111111112e-05, "loss": 0.0261, "step": 126 }, { "epoch": 8.53, "learning_rate": 6.51851851851852e-05, "loss": 0.0353, "step": 128 }, { "epoch": 8.67, "learning_rate": 5.925925925925926e-05, "loss": 0.0548, "step": 130 }, { "epoch": 8.8, "learning_rate": 5.333333333333333e-05, "loss": 0.0324, "step": 132 }, { "epoch": 8.93, "learning_rate": 4.740740740740741e-05, "loss": 0.0362, "step": 134 }, { "epoch": 9.07, "learning_rate": 4.148148148148148e-05, "loss": 0.0258, "step": 136 }, { "epoch": 9.2, "learning_rate": 3.555555555555556e-05, "loss": 0.0328, "step": 138 }, { "epoch": 9.33, "learning_rate": 2.962962962962963e-05, "loss": 0.0325, "step": 140 }, { "epoch": 9.47, "learning_rate": 2.3703703703703707e-05, "loss": 0.0245, "step": 142 }, { "epoch": 9.6, "learning_rate": 1.777777777777778e-05, "loss": 0.0294, "step": 144 }, { "epoch": 9.73, "learning_rate": 1.1851851851851853e-05, "loss": 0.0439, "step": 146 }, { "epoch": 9.87, "learning_rate": 5.925925925925927e-06, "loss": 0.0412, "step": 148 }, { "epoch": 10.0, "learning_rate": 0.0, "loss": 0.0352, "step": 150 } ], "max_steps": 150, "num_train_epochs": 10, "total_flos": 2.407321351225344e+16, "trial_name": null, "trial_params": null }