| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 23.998851894374283, | |
| "global_step": 10440, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 2.075101852416992, | |
| "eval_runtime": 45.9619, | |
| "eval_samples_per_second": 63.814, | |
| "eval_steps_per_second": 7.985, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.760536398467433e-05, | |
| "loss": 2.1982, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 2.046468496322632, | |
| "eval_runtime": 45.9297, | |
| "eval_samples_per_second": 63.859, | |
| "eval_steps_per_second": 7.99, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 4.5210727969348656e-05, | |
| "loss": 2.0841, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 2.042027235031128, | |
| "eval_runtime": 45.9311, | |
| "eval_samples_per_second": 63.857, | |
| "eval_steps_per_second": 7.99, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 4.2816091954022994e-05, | |
| "loss": 2.0374, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 2.032456398010254, | |
| "eval_runtime": 45.8552, | |
| "eval_samples_per_second": 63.962, | |
| "eval_steps_per_second": 8.003, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 4.0421455938697324e-05, | |
| "loss": 1.9731, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 2.007476806640625, | |
| "eval_runtime": 45.8552, | |
| "eval_samples_per_second": 63.962, | |
| "eval_steps_per_second": 8.003, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 3.802681992337165e-05, | |
| "loss": 1.9248, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 2.021885871887207, | |
| "eval_runtime": 45.9257, | |
| "eval_samples_per_second": 63.864, | |
| "eval_steps_per_second": 7.991, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 3.563218390804598e-05, | |
| "loss": 1.8848, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 1.9770045280456543, | |
| "eval_runtime": 45.8935, | |
| "eval_samples_per_second": 63.909, | |
| "eval_steps_per_second": 7.997, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 2.0093371868133545, | |
| "eval_runtime": 45.9038, | |
| "eval_samples_per_second": 63.895, | |
| "eval_steps_per_second": 7.995, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 8.05, | |
| "learning_rate": 3.323754789272031e-05, | |
| "loss": 1.8419, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 2.0297892093658447, | |
| "eval_runtime": 45.9137, | |
| "eval_samples_per_second": 63.881, | |
| "eval_steps_per_second": 7.993, | |
| "step": 3915 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "learning_rate": 3.084291187739464e-05, | |
| "loss": 1.804, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 1.9681042432785034, | |
| "eval_runtime": 45.9244, | |
| "eval_samples_per_second": 63.866, | |
| "eval_steps_per_second": 7.991, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 10.34, | |
| "learning_rate": 2.844827586206897e-05, | |
| "loss": 1.7817, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 1.9937865734100342, | |
| "eval_runtime": 45.8978, | |
| "eval_samples_per_second": 63.903, | |
| "eval_steps_per_second": 7.996, | |
| "step": 4785 | |
| }, | |
| { | |
| "epoch": 11.49, | |
| "learning_rate": 2.6053639846743293e-05, | |
| "loss": 1.7472, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 1.9653546810150146, | |
| "eval_runtime": 45.9667, | |
| "eval_samples_per_second": 63.807, | |
| "eval_steps_per_second": 7.984, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 12.64, | |
| "learning_rate": 2.3659003831417627e-05, | |
| "loss": 1.7075, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_loss": 1.9797016382217407, | |
| "eval_runtime": 45.9403, | |
| "eval_samples_per_second": 63.844, | |
| "eval_steps_per_second": 7.989, | |
| "step": 5655 | |
| }, | |
| { | |
| "epoch": 13.79, | |
| "learning_rate": 2.1264367816091954e-05, | |
| "loss": 1.6976, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 1.9690951108932495, | |
| "eval_runtime": 45.9506, | |
| "eval_samples_per_second": 63.829, | |
| "eval_steps_per_second": 7.987, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 14.94, | |
| "learning_rate": 1.8869731800766285e-05, | |
| "loss": 1.6748, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 1.9567583799362183, | |
| "eval_runtime": 46.1194, | |
| "eval_samples_per_second": 63.596, | |
| "eval_steps_per_second": 7.958, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_loss": 1.9617565870285034, | |
| "eval_runtime": 46.1884, | |
| "eval_samples_per_second": 63.501, | |
| "eval_steps_per_second": 7.946, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 16.09, | |
| "learning_rate": 1.6475095785440615e-05, | |
| "loss": 1.6528, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_loss": 1.9842615127563477, | |
| "eval_runtime": 45.9636, | |
| "eval_samples_per_second": 63.811, | |
| "eval_steps_per_second": 7.985, | |
| "step": 7395 | |
| }, | |
| { | |
| "epoch": 17.24, | |
| "learning_rate": 1.4080459770114942e-05, | |
| "loss": 1.6335, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_loss": 1.926523208618164, | |
| "eval_runtime": 45.9116, | |
| "eval_samples_per_second": 63.884, | |
| "eval_steps_per_second": 7.994, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 18.39, | |
| "learning_rate": 1.1685823754789272e-05, | |
| "loss": 1.6179, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_loss": 1.959792137145996, | |
| "eval_runtime": 45.9236, | |
| "eval_samples_per_second": 63.867, | |
| "eval_steps_per_second": 7.992, | |
| "step": 8265 | |
| }, | |
| { | |
| "epoch": 19.54, | |
| "learning_rate": 9.291187739463603e-06, | |
| "loss": 1.5992, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 1.933074951171875, | |
| "eval_runtime": 45.9682, | |
| "eval_samples_per_second": 63.805, | |
| "eval_steps_per_second": 7.984, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 20.69, | |
| "learning_rate": 6.896551724137932e-06, | |
| "loss": 1.583, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_loss": 1.9795454740524292, | |
| "eval_runtime": 45.9526, | |
| "eval_samples_per_second": 63.827, | |
| "eval_steps_per_second": 7.986, | |
| "step": 9135 | |
| }, | |
| { | |
| "epoch": 21.84, | |
| "learning_rate": 4.50191570881226e-06, | |
| "loss": 1.5699, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_loss": 2.007305860519409, | |
| "eval_runtime": 45.9532, | |
| "eval_samples_per_second": 63.826, | |
| "eval_steps_per_second": 7.986, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 22.99, | |
| "learning_rate": 2.1072796934865904e-06, | |
| "loss": 1.5703, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_loss": 1.9308319091796875, | |
| "eval_runtime": 45.9891, | |
| "eval_samples_per_second": 63.776, | |
| "eval_steps_per_second": 7.98, | |
| "step": 10005 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_loss": 1.9284570217132568, | |
| "eval_runtime": 45.932, | |
| "eval_samples_per_second": 63.855, | |
| "eval_steps_per_second": 7.99, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "step": 10440, | |
| "total_flos": 3.52063018239615e+17, | |
| "train_loss": 1.7702036539713542, | |
| "train_runtime": 78929.442, | |
| "train_samples_per_second": 16.948, | |
| "train_steps_per_second": 0.132 | |
| } | |
| ], | |
| "max_steps": 10440, | |
| "num_train_epochs": 24, | |
| "total_flos": 3.52063018239615e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |