| { |
| "best_metric": 1.3470327854156494, |
| "best_model_checkpoint": "outputs/checkpoint-400", |
| "epoch": 0.13689253935660506, |
| "global_step": 400, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 1e-05, |
| "loss": 1.8606, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 2e-05, |
| "loss": 1.8806, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 3e-05, |
| "loss": 1.8434, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4e-05, |
| "loss": 1.7431, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 5e-05, |
| "loss": 1.7721, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 6e-05, |
| "loss": 1.6826, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7e-05, |
| "loss": 1.5892, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 8e-05, |
| "loss": 1.6333, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 9e-05, |
| "loss": 1.5955, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.0001, |
| "loss": 1.5504, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00011000000000000002, |
| "loss": 1.5621, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00012, |
| "loss": 1.5142, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00013000000000000002, |
| "loss": 1.4386, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00014, |
| "loss": 1.5017, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00015000000000000001, |
| "loss": 1.4666, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00016, |
| "loss": 1.4295, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00017, |
| "loss": 1.4402, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00018, |
| "loss": 1.4795, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019, |
| "loss": 1.4138, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.0002, |
| "loss": 1.4493, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0001998259052924791, |
| "loss": 1.4918, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019965181058495822, |
| "loss": 1.4659, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019947771587743734, |
| "loss": 1.4573, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019930362116991646, |
| "loss": 1.4274, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019912952646239555, |
| "loss": 1.4064, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019895543175487465, |
| "loss": 1.4048, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00019878133704735376, |
| "loss": 1.4493, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00019860724233983288, |
| "loss": 1.3317, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00019843314763231198, |
| "loss": 1.4041, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.0001982590529247911, |
| "loss": 1.4311, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.0001980849582172702, |
| "loss": 1.3626, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00019791086350974934, |
| "loss": 1.4296, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00019773676880222843, |
| "loss": 1.3888, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00019756267409470752, |
| "loss": 1.3389, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00019738857938718664, |
| "loss": 1.4544, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00019721448467966573, |
| "loss": 1.4382, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00019704038997214485, |
| "loss": 1.4953, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00019686629526462397, |
| "loss": 1.4026, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00019669220055710306, |
| "loss": 1.3757, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00019651810584958218, |
| "loss": 1.3247, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07, |
| "eval_loss": 1.3710952997207642, |
| "eval_runtime": 738.1164, |
| "eval_samples_per_second": 7.038, |
| "eval_steps_per_second": 0.881, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.0001963440111420613, |
| "loss": 1.4409, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.0001961699164345404, |
| "loss": 1.4023, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00019599582172701951, |
| "loss": 1.3736, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.0001958217270194986, |
| "loss": 1.4321, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00019564763231197773, |
| "loss": 1.4344, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00019547353760445685, |
| "loss": 1.3539, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00019529944289693594, |
| "loss": 1.438, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00019512534818941506, |
| "loss": 1.457, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00019495125348189415, |
| "loss": 1.429, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00019477715877437327, |
| "loss": 1.4465, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.0001946030640668524, |
| "loss": 1.4202, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00019442896935933148, |
| "loss": 1.343, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.0001942548746518106, |
| "loss": 1.4127, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.0001940807799442897, |
| "loss": 1.3171, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.0001939066852367688, |
| "loss": 1.4012, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019373259052924793, |
| "loss": 1.2933, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019355849582172702, |
| "loss": 1.326, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019338440111420614, |
| "loss": 1.3951, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019321030640668524, |
| "loss": 1.3149, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019303621169916436, |
| "loss": 1.4046, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019286211699164348, |
| "loss": 1.343, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00019268802228412257, |
| "loss": 1.3676, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00019251392757660166, |
| "loss": 1.3593, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0001923398328690808, |
| "loss": 1.3869, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0001921657381615599, |
| "loss": 1.3841, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00019199164345403902, |
| "loss": 1.3806, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0001918175487465181, |
| "loss": 1.4527, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0001916434540389972, |
| "loss": 1.4161, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00019146935933147635, |
| "loss": 1.3019, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00019129526462395544, |
| "loss": 1.4082, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00019112116991643454, |
| "loss": 1.4261, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00019094707520891365, |
| "loss": 1.4152, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00019077298050139277, |
| "loss": 1.4289, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0001905988857938719, |
| "loss": 1.3951, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.00019042479108635099, |
| "loss": 1.3578, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.00019025069637883008, |
| "loss": 1.3342, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0001900766016713092, |
| "loss": 1.3959, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.00018990250696378832, |
| "loss": 1.4336, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00018972841225626744, |
| "loss": 1.464, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00018955431754874653, |
| "loss": 1.4053, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.14, |
| "eval_loss": 1.3470327854156494, |
| "eval_runtime": 738.9389, |
| "eval_samples_per_second": 7.03, |
| "eval_steps_per_second": 0.88, |
| "step": 400 |
| } |
| ], |
| "max_steps": 5844, |
| "num_train_epochs": 2, |
| "total_flos": 2.38239419990016e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|