| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 35.026963262554766, | |
| "global_step": 12960, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 8e-05, | |
| "loss": 1.9241, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00016, | |
| "loss": 1.8026, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_loss": 1.7006735801696777, | |
| "eval_runtime": 120.5725, | |
| "eval_samples_per_second": 46.437, | |
| "eval_steps_per_second": 0.73, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0002, | |
| "loss": 1.7588, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.0002, | |
| "loss": 1.7242, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "eval_loss": 1.6368365287780762, | |
| "eval_runtime": 123.5326, | |
| "eval_samples_per_second": 45.324, | |
| "eval_steps_per_second": 0.712, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.0002, | |
| "loss": 1.6797, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 0.0002, | |
| "loss": 1.6544, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "eval_loss": 1.589858889579773, | |
| "eval_runtime": 121.9204, | |
| "eval_samples_per_second": 45.923, | |
| "eval_steps_per_second": 0.722, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 0.0002, | |
| "loss": 1.639, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 0.0002, | |
| "loss": 1.6103, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "eval_loss": 1.559193730354309, | |
| "eval_runtime": 118.3836, | |
| "eval_samples_per_second": 47.295, | |
| "eval_steps_per_second": 0.743, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 0.0002, | |
| "loss": 1.5982, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 0.0002, | |
| "loss": 1.5858, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "eval_loss": 1.5362491607666016, | |
| "eval_runtime": 123.4422, | |
| "eval_samples_per_second": 45.357, | |
| "eval_steps_per_second": 0.713, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 0.0002, | |
| "loss": 1.5684, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 0.0002, | |
| "loss": 1.5566, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "eval_loss": 1.51528799533844, | |
| "eval_runtime": 120.2858, | |
| "eval_samples_per_second": 46.547, | |
| "eval_steps_per_second": 0.732, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 0.0002, | |
| "loss": 1.5593, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 0.0002, | |
| "loss": 1.5322, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "eval_loss": 1.5114836692810059, | |
| "eval_runtime": 117.8482, | |
| "eval_samples_per_second": 47.51, | |
| "eval_steps_per_second": 0.747, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 0.0002, | |
| "loss": 1.5285, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 0.0002, | |
| "loss": 1.5359, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "eval_loss": 1.48625910282135, | |
| "eval_runtime": 123.7493, | |
| "eval_samples_per_second": 45.245, | |
| "eval_steps_per_second": 0.711, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 0.0002, | |
| "loss": 1.5207, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 0.0002, | |
| "loss": 1.5079, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "eval_loss": 1.4822603464126587, | |
| "eval_runtime": 119.2091, | |
| "eval_samples_per_second": 46.968, | |
| "eval_steps_per_second": 0.738, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 0.0002, | |
| "loss": 1.51, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4909, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "eval_loss": 1.4646539688110352, | |
| "eval_runtime": 122.8427, | |
| "eval_samples_per_second": 45.579, | |
| "eval_steps_per_second": 0.716, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4869, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4894, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "eval_loss": 1.4567737579345703, | |
| "eval_runtime": 112.4698, | |
| "eval_samples_per_second": 49.782, | |
| "eval_steps_per_second": 0.782, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4705, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "learning_rate": 0.0002, | |
| "loss": 1.469, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "eval_loss": 1.447322130203247, | |
| "eval_runtime": 124.434, | |
| "eval_samples_per_second": 44.996, | |
| "eval_steps_per_second": 0.707, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 8.11, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4716, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 8.43, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4525, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 8.43, | |
| "eval_loss": 1.4480490684509277, | |
| "eval_runtime": 120.9825, | |
| "eval_samples_per_second": 46.279, | |
| "eval_steps_per_second": 0.727, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "learning_rate": 0.0002, | |
| "loss": 1.452, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 9.08, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4552, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 9.08, | |
| "eval_loss": 1.4297771453857422, | |
| "eval_runtime": 119.4349, | |
| "eval_samples_per_second": 46.879, | |
| "eval_steps_per_second": 0.737, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4369, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 9.73, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4357, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 9.73, | |
| "eval_loss": 1.4253787994384766, | |
| "eval_runtime": 123.7286, | |
| "eval_samples_per_second": 45.252, | |
| "eval_steps_per_second": 0.711, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 10.05, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4449, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 10.38, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4245, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 10.38, | |
| "eval_loss": 1.419893741607666, | |
| "eval_runtime": 122.5962, | |
| "eval_samples_per_second": 45.67, | |
| "eval_steps_per_second": 0.718, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 10.7, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4259, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 11.03, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4317, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 11.03, | |
| "eval_loss": 1.4151264429092407, | |
| "eval_runtime": 120.6018, | |
| "eval_samples_per_second": 46.426, | |
| "eval_steps_per_second": 0.73, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 11.35, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4133, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 11.67, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4119, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 11.67, | |
| "eval_loss": 1.4069455862045288, | |
| "eval_runtime": 123.9031, | |
| "eval_samples_per_second": 45.189, | |
| "eval_steps_per_second": 0.71, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4096, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 12.32, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4086, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 12.32, | |
| "eval_loss": 1.4099173545837402, | |
| "eval_runtime": 121.1011, | |
| "eval_samples_per_second": 46.234, | |
| "eval_steps_per_second": 0.727, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 12.65, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4031, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 12.97, | |
| "learning_rate": 0.0002, | |
| "loss": 1.401, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 12.97, | |
| "eval_loss": 1.4046831130981445, | |
| "eval_runtime": 121.8177, | |
| "eval_samples_per_second": 45.962, | |
| "eval_steps_per_second": 0.722, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 13.3, | |
| "learning_rate": 0.0002, | |
| "loss": 1.4031, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 13.62, | |
| "learning_rate": 0.0002, | |
| "loss": 1.394, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 13.62, | |
| "eval_loss": 1.401537299156189, | |
| "eval_runtime": 121.4356, | |
| "eval_samples_per_second": 46.107, | |
| "eval_steps_per_second": 0.725, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 13.94, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3922, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 14.27, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3945, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 14.27, | |
| "eval_loss": 1.3918230533599854, | |
| "eval_runtime": 119.2233, | |
| "eval_samples_per_second": 46.962, | |
| "eval_steps_per_second": 0.738, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 14.59, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3836, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 14.92, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3838, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 14.92, | |
| "eval_loss": 1.385350227355957, | |
| "eval_runtime": 113.4489, | |
| "eval_samples_per_second": 49.353, | |
| "eval_steps_per_second": 0.776, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 15.24, | |
| "learning_rate": 0.0002, | |
| "loss": 1.387, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 15.57, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3722, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 15.57, | |
| "eval_loss": 1.379088282585144, | |
| "eval_runtime": 116.4932, | |
| "eval_samples_per_second": 48.063, | |
| "eval_steps_per_second": 0.755, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 15.89, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3757, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 16.22, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3775, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 16.22, | |
| "eval_loss": 1.384007453918457, | |
| "eval_runtime": 115.8099, | |
| "eval_samples_per_second": 48.346, | |
| "eval_steps_per_second": 0.76, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 16.54, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3683, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 16.86, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3675, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 16.86, | |
| "eval_loss": 1.3760778903961182, | |
| "eval_runtime": 113.2638, | |
| "eval_samples_per_second": 49.433, | |
| "eval_steps_per_second": 0.777, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 17.19, | |
| "learning_rate": 0.0002, | |
| "loss": 1.375, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 17.51, | |
| "learning_rate": 0.0002, | |
| "loss": 1.358, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 17.51, | |
| "eval_loss": 1.3729970455169678, | |
| "eval_runtime": 119.1962, | |
| "eval_samples_per_second": 46.973, | |
| "eval_steps_per_second": 0.738, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 17.84, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3617, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 18.16, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3679, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 18.16, | |
| "eval_loss": 1.3826600313186646, | |
| "eval_runtime": 118.9849, | |
| "eval_samples_per_second": 47.056, | |
| "eval_steps_per_second": 0.74, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 18.49, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3592, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 18.81, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3602, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 18.81, | |
| "eval_loss": 1.3659363985061646, | |
| "eval_runtime": 120.7081, | |
| "eval_samples_per_second": 46.385, | |
| "eval_steps_per_second": 0.729, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 19.13, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3633, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 19.46, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3522, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 19.46, | |
| "eval_loss": 1.372406244277954, | |
| "eval_runtime": 113.6178, | |
| "eval_samples_per_second": 49.279, | |
| "eval_steps_per_second": 0.775, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 19.78, | |
| "learning_rate": 0.0002, | |
| "loss": 1.345, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 20.11, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3555, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 20.11, | |
| "eval_loss": 1.368371844291687, | |
| "eval_runtime": 118.9369, | |
| "eval_samples_per_second": 47.075, | |
| "eval_steps_per_second": 0.74, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 20.43, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3396, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 20.75, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3536, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 20.75, | |
| "eval_loss": 1.3611598014831543, | |
| "eval_runtime": 119.3386, | |
| "eval_samples_per_second": 46.917, | |
| "eval_steps_per_second": 0.737, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 21.08, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3506, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 21.4, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3347, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 21.4, | |
| "eval_loss": 1.3598804473876953, | |
| "eval_runtime": 114.0961, | |
| "eval_samples_per_second": 49.073, | |
| "eval_steps_per_second": 0.771, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 21.73, | |
| "learning_rate": 0.0002, | |
| "loss": 1.338, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 22.05, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3463, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 22.05, | |
| "eval_loss": 1.3614617586135864, | |
| "eval_runtime": 121.7757, | |
| "eval_samples_per_second": 45.978, | |
| "eval_steps_per_second": 0.723, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 22.38, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3305, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 22.7, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3296, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 22.7, | |
| "eval_loss": 1.359055519104004, | |
| "eval_runtime": 113.3148, | |
| "eval_samples_per_second": 49.411, | |
| "eval_steps_per_second": 0.777, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 23.03, | |
| "learning_rate": 0.0002, | |
| "loss": 1.344, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 23.35, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3201, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 23.35, | |
| "eval_loss": 1.358960509300232, | |
| "eval_runtime": 122.2886, | |
| "eval_samples_per_second": 45.785, | |
| "eval_steps_per_second": 0.72, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 23.67, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3302, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3292, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_loss": 1.3509206771850586, | |
| "eval_runtime": 99.6058, | |
| "eval_samples_per_second": 56.212, | |
| "eval_steps_per_second": 0.883, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 24.32, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3294, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 24.65, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3207, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 24.65, | |
| "eval_loss": 1.357851505279541, | |
| "eval_runtime": 105.9073, | |
| "eval_samples_per_second": 52.867, | |
| "eval_steps_per_second": 0.831, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 24.97, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3215, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 25.3, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3231, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 25.3, | |
| "eval_loss": 1.3393853902816772, | |
| "eval_runtime": 99.7219, | |
| "eval_samples_per_second": 56.146, | |
| "eval_steps_per_second": 0.882, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 25.62, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3121, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 25.94, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3176, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 25.94, | |
| "eval_loss": 1.3441215753555298, | |
| "eval_runtime": 101.3937, | |
| "eval_samples_per_second": 55.22, | |
| "eval_steps_per_second": 0.868, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 26.27, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3188, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 26.59, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3103, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 26.59, | |
| "eval_loss": 1.3429008722305298, | |
| "eval_runtime": 100.8116, | |
| "eval_samples_per_second": 55.539, | |
| "eval_steps_per_second": 0.873, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 26.92, | |
| "learning_rate": 0.0002, | |
| "loss": 1.313, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 27.24, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3156, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 27.24, | |
| "eval_loss": 1.3400343656539917, | |
| "eval_runtime": 98.2948, | |
| "eval_samples_per_second": 56.961, | |
| "eval_steps_per_second": 0.895, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 27.57, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3064, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 27.89, | |
| "learning_rate": 0.0002, | |
| "loss": 1.306, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 27.89, | |
| "eval_loss": 1.339460015296936, | |
| "eval_runtime": 97.8707, | |
| "eval_samples_per_second": 57.208, | |
| "eval_steps_per_second": 0.899, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 28.22, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3093, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 28.54, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3026, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 28.54, | |
| "eval_loss": 1.3380861282348633, | |
| "eval_runtime": 99.7827, | |
| "eval_samples_per_second": 56.112, | |
| "eval_steps_per_second": 0.882, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 28.86, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3014, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 29.19, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3093, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 29.19, | |
| "eval_loss": 1.335351824760437, | |
| "eval_runtime": 99.7514, | |
| "eval_samples_per_second": 56.13, | |
| "eval_steps_per_second": 0.882, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 29.51, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2954, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 29.84, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2982, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 29.84, | |
| "eval_loss": 1.33037269115448, | |
| "eval_runtime": 111.392, | |
| "eval_samples_per_second": 50.264, | |
| "eval_steps_per_second": 0.79, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 30.16, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3032, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 30.49, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2927, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 30.49, | |
| "eval_loss": 1.3423055410385132, | |
| "eval_runtime": 110.815, | |
| "eval_samples_per_second": 50.526, | |
| "eval_steps_per_second": 0.794, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 30.81, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2968, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 31.13, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3003, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 31.13, | |
| "eval_loss": 1.3345474004745483, | |
| "eval_runtime": 100.6956, | |
| "eval_samples_per_second": 55.603, | |
| "eval_steps_per_second": 0.874, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 31.46, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2865, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 31.78, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2928, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 31.78, | |
| "eval_loss": 1.337437629699707, | |
| "eval_runtime": 97.2235, | |
| "eval_samples_per_second": 57.589, | |
| "eval_steps_per_second": 0.905, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 32.11, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2981, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 32.43, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2847, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 32.43, | |
| "eval_loss": 1.3236644268035889, | |
| "eval_runtime": 97.4026, | |
| "eval_samples_per_second": 57.483, | |
| "eval_steps_per_second": 0.903, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 32.75, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2871, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 33.08, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2966, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 33.08, | |
| "eval_loss": 1.332656741142273, | |
| "eval_runtime": 97.3643, | |
| "eval_samples_per_second": 57.506, | |
| "eval_steps_per_second": 0.904, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 33.4, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2789, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 33.73, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2829, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 33.73, | |
| "eval_loss": 1.3252918720245361, | |
| "eval_runtime": 104.7279, | |
| "eval_samples_per_second": 53.462, | |
| "eval_steps_per_second": 0.84, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 34.05, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2926, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 34.38, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2756, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 34.38, | |
| "eval_loss": 1.326663613319397, | |
| "eval_runtime": 98.2526, | |
| "eval_samples_per_second": 56.986, | |
| "eval_steps_per_second": 0.896, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 34.7, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2801, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 35.03, | |
| "learning_rate": 0.0002, | |
| "loss": 1.2919, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 35.03, | |
| "eval_loss": 1.3183717727661133, | |
| "eval_runtime": 99.1376, | |
| "eval_samples_per_second": 56.477, | |
| "eval_steps_per_second": 0.888, | |
| "step": 12960 | |
| } | |
| ], | |
| "max_steps": 14000, | |
| "num_train_epochs": 38, | |
| "total_flos": 1.7505797492048026e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |