| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "global_step": 150, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 5.333333333333333e-05, | |
| "loss": 0.8383, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00010666666666666667, | |
| "loss": 0.8509, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00016, | |
| "loss": 0.8373, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00021333333333333333, | |
| "loss": 0.8067, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.0002666666666666667, | |
| "loss": 0.5774, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00032, | |
| "loss": 0.5267, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.0003733333333333334, | |
| "loss": 0.551, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.00039703703703703705, | |
| "loss": 0.4659, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.0003911111111111111, | |
| "loss": 0.4995, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.0003851851851851852, | |
| "loss": 0.5013, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 0.0003792592592592593, | |
| "loss": 0.3762, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.0003733333333333334, | |
| "loss": 0.3964, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 0.00036740740740740744, | |
| "loss": 0.4134, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 0.0003614814814814815, | |
| "loss": 0.3222, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.00035555555555555557, | |
| "loss": 0.2872, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 0.00034962962962962964, | |
| "loss": 0.3529, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 0.0003437037037037037, | |
| "loss": 0.282, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 0.00033777777777777777, | |
| "loss": 0.2544, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 0.00033185185185185184, | |
| "loss": 0.212, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 0.0003259259259259259, | |
| "loss": 0.1252, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 0.00032, | |
| "loss": 0.1933, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 0.0003140740740740741, | |
| "loss": 0.1614, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 0.00030814814814814816, | |
| "loss": 0.1694, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 0.0003022222222222222, | |
| "loss": 0.1645, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 0.0002962962962962963, | |
| "loss": 0.1532, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 0.0002903703703703704, | |
| "loss": 0.1115, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 0.0002844444444444445, | |
| "loss": 0.0873, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 0.00027851851851851855, | |
| "loss": 0.1409, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 0.0002725925925925926, | |
| "loss": 0.1363, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 0.0002666666666666667, | |
| "loss": 0.12, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 0.0002607407407407408, | |
| "loss": 0.0858, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 0.0002548148148148148, | |
| "loss": 0.1151, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 0.0002488888888888889, | |
| "loss": 0.0631, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 0.00024296296296296297, | |
| "loss": 0.1468, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 0.00023703703703703704, | |
| "loss": 0.1032, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 0.0002311111111111111, | |
| "loss": 0.0626, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 0.0002251851851851852, | |
| "loss": 0.0551, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 0.00021925925925925927, | |
| "loss": 0.0717, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 0.00021333333333333333, | |
| "loss": 0.0711, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 0.0002074074074074074, | |
| "loss": 0.0717, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 0.00020148148148148147, | |
| "loss": 0.0886, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 0.00019555555555555556, | |
| "loss": 0.0527, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "learning_rate": 0.00018962962962962965, | |
| "loss": 0.0633, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 0.00018370370370370372, | |
| "loss": 0.0424, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 0.00017777777777777779, | |
| "loss": 0.0431, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 0.00017185185185185185, | |
| "loss": 0.0538, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "learning_rate": 0.00016592592592592592, | |
| "loss": 0.0526, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 0.00016, | |
| "loss": 0.055, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 0.00015407407407407408, | |
| "loss": 0.0518, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 0.00014814814814814815, | |
| "loss": 0.0484, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 0.00014222222222222224, | |
| "loss": 0.0526, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 0.0001362962962962963, | |
| "loss": 0.0544, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "learning_rate": 0.0001303703703703704, | |
| "loss": 0.036, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 0.00012444444444444444, | |
| "loss": 0.0475, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "learning_rate": 0.00011851851851851852, | |
| "loss": 0.0466, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "learning_rate": 0.0001125925925925926, | |
| "loss": 0.0416, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "learning_rate": 0.00010666666666666667, | |
| "loss": 0.0387, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "learning_rate": 0.00010074074074074073, | |
| "loss": 0.0494, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 7.87, | |
| "learning_rate": 9.481481481481483e-05, | |
| "loss": 0.0451, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 8.888888888888889e-05, | |
| "loss": 0.0348, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 8.13, | |
| "learning_rate": 8.296296296296296e-05, | |
| "loss": 0.0487, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 7.703703703703704e-05, | |
| "loss": 0.0331, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "learning_rate": 7.111111111111112e-05, | |
| "loss": 0.0261, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "learning_rate": 6.51851851851852e-05, | |
| "loss": 0.0353, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 8.67, | |
| "learning_rate": 5.925925925925926e-05, | |
| "loss": 0.0548, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 5.333333333333333e-05, | |
| "loss": 0.0324, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "learning_rate": 4.740740740740741e-05, | |
| "loss": 0.0362, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 9.07, | |
| "learning_rate": 4.148148148148148e-05, | |
| "loss": 0.0258, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "learning_rate": 3.555555555555556e-05, | |
| "loss": 0.0328, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "learning_rate": 2.962962962962963e-05, | |
| "loss": 0.0325, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 9.47, | |
| "learning_rate": 2.3703703703703707e-05, | |
| "loss": 0.0245, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "learning_rate": 1.777777777777778e-05, | |
| "loss": 0.0294, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 9.73, | |
| "learning_rate": 1.1851851851851853e-05, | |
| "loss": 0.0439, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "learning_rate": 5.925925925925927e-06, | |
| "loss": 0.0412, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0352, | |
| "step": 150 | |
| } | |
| ], | |
| "max_steps": 150, | |
| "num_train_epochs": 10, | |
| "total_flos": 2.407321351225344e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |