| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.07289804249572203, | |
| "global_step": 9500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00099, | |
| "loss": 3.0632, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0009901010101010103, | |
| "loss": 2.9421, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00098, | |
| "loss": 2.9549, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00096989898989899, | |
| "loss": 2.9156, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0009597979797979798, | |
| "loss": 2.9743, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0009496969696969697, | |
| "loss": 2.9801, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0009395959595959597, | |
| "loss": 2.9492, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0009294949494949495, | |
| "loss": 3.043, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0009193939393939395, | |
| "loss": 2.9186, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0009092929292929293, | |
| "loss": 2.8856, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0008992929292929293, | |
| "loss": 2.947, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0008891919191919192, | |
| "loss": 2.9986, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0008790909090909091, | |
| "loss": 2.9525, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.000868989898989899, | |
| "loss": 2.9203, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.000858888888888889, | |
| "loss": 2.9795, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0008487878787878788, | |
| "loss": 2.9549, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0008386868686868687, | |
| "loss": 2.9033, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0008285858585858585, | |
| "loss": 2.9117, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0008185858585858587, | |
| "loss": 2.9674, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0008085858585858587, | |
| "loss": 2.9492, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0007984848484848485, | |
| "loss": 2.9526, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0007883838383838384, | |
| "loss": 2.9687, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0007782828282828282, | |
| "loss": 2.9783, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0007681818181818182, | |
| "loss": 2.9889, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0007581818181818182, | |
| "loss": 2.9141, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0007480808080808081, | |
| "loss": 2.9131, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.000737979797979798, | |
| "loss": 2.9087, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0007278787878787879, | |
| "loss": 2.9485, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0007177777777777778, | |
| "loss": 2.8469, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0007076767676767677, | |
| "loss": 2.8715, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0006975757575757576, | |
| "loss": 2.9388, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0006874747474747474, | |
| "loss": 2.9958, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0006773737373737374, | |
| "loss": 2.9552, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0006672727272727273, | |
| "loss": 2.9126, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0006571717171717172, | |
| "loss": 3.0015, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.000647070707070707, | |
| "loss": 2.9406, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0006369696969696969, | |
| "loss": 2.8848, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.000626868686868687, | |
| "loss": 2.9598, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0006167676767676768, | |
| "loss": 2.8795, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0006066666666666667, | |
| "loss": 2.9366, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0005965656565656565, | |
| "loss": 2.9357, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0005864646464646464, | |
| "loss": 2.8924, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0005763636363636365, | |
| "loss": 2.8939, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0005662626262626263, | |
| "loss": 2.8453, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0005561616161616162, | |
| "loss": 2.8551, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.000546060606060606, | |
| "loss": 2.9476, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0005359595959595959, | |
| "loss": 2.9256, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.000525858585858586, | |
| "loss": 2.963, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0005157575757575758, | |
| "loss": 2.8954, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0005056565656565657, | |
| "loss": 2.8978, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0004955555555555556, | |
| "loss": 2.8789, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0004854545454545455, | |
| "loss": 2.8757, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00047545454545454545, | |
| "loss": 2.9107, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0004653535353535354, | |
| "loss": 2.8769, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00045525252525252524, | |
| "loss": 2.8659, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00044515151515151516, | |
| "loss": 2.8624, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0004350505050505051, | |
| "loss": 2.8672, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00042494949494949495, | |
| "loss": 2.8286, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00041484848484848487, | |
| "loss": 2.9685, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00040474747474747474, | |
| "loss": 2.9659, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00039464646464646466, | |
| "loss": 2.8937, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0003845454545454545, | |
| "loss": 2.8521, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0003744444444444445, | |
| "loss": 2.8721, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00036434343434343437, | |
| "loss": 2.8664, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00035424242424242423, | |
| "loss": 2.8925, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00034414141414141415, | |
| "loss": 2.8477, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.000334040404040404, | |
| "loss": 2.8804, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00032393939393939394, | |
| "loss": 2.8661, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00031383838383838386, | |
| "loss": 2.8934, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0003037373737373738, | |
| "loss": 2.9107, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00029363636363636365, | |
| "loss": 2.8775, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002835353535353535, | |
| "loss": 2.8849, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00027343434343434343, | |
| "loss": 2.845, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002633333333333333, | |
| "loss": 2.8441, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002532323232323233, | |
| "loss": 2.8455, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00024313131313131314, | |
| "loss": 2.8027, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00023303030303030304, | |
| "loss": 2.8793, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00022292929292929293, | |
| "loss": 2.8763, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00021282828282828285, | |
| "loss": 2.8456, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00020272727272727274, | |
| "loss": 2.8217, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001926262626262626, | |
| "loss": 2.8026, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00018252525252525253, | |
| "loss": 2.8486, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00017242424242424242, | |
| "loss": 2.8023, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00016232323232323232, | |
| "loss": 2.8711, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00015222222222222224, | |
| "loss": 2.9234, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00014212121212121213, | |
| "loss": 2.8615, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00013202020202020203, | |
| "loss": 2.8765, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00012191919191919192, | |
| "loss": 2.891, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00011181818181818181, | |
| "loss": 2.8735, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00010171717171717172, | |
| "loss": 2.7965, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.161616161616161e-05, | |
| "loss": 2.7891, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 8.151515151515152e-05, | |
| "loss": 2.8796, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 7.141414141414141e-05, | |
| "loss": 2.8947, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 6.141414141414142e-05, | |
| "loss": 2.8773, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 5.131313131313131e-05, | |
| "loss": 2.8241, | |
| "step": 9500 | |
| } | |
| ], | |
| "max_steps": 10000, | |
| "num_train_epochs": 1, | |
| "total_flos": 3247836551823360.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |