| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 8.506294658046954, | |
| "global_step": 100000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.5192, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4978, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 0.4652232229709625, | |
| "eval_runtime": 21.3928, | |
| "eval_samples_per_second": 23.372, | |
| "eval_steps_per_second": 0.748, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4877, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4811, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_loss": 0.4523410201072693, | |
| "eval_runtime": 15.3182, | |
| "eval_samples_per_second": 32.641, | |
| "eval_steps_per_second": 1.045, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4747, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4714, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_loss": 0.44367074966430664, | |
| "eval_runtime": 16.026, | |
| "eval_samples_per_second": 31.199, | |
| "eval_steps_per_second": 0.998, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4671, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4648, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_loss": 0.4375583827495575, | |
| "eval_runtime": 16.9713, | |
| "eval_samples_per_second": 29.461, | |
| "eval_steps_per_second": 0.943, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4628, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4611, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_loss": 0.4329264163970947, | |
| "eval_runtime": 20.3173, | |
| "eval_samples_per_second": 24.61, | |
| "eval_steps_per_second": 0.788, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4588, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4567, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_loss": 0.4276145100593567, | |
| "eval_runtime": 16.3756, | |
| "eval_samples_per_second": 30.533, | |
| "eval_steps_per_second": 0.977, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4549, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4527, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_loss": 0.42289844155311584, | |
| "eval_runtime": 15.9391, | |
| "eval_samples_per_second": 31.369, | |
| "eval_steps_per_second": 1.004, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4523, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4504, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_loss": 0.4213045537471771, | |
| "eval_runtime": 15.5457, | |
| "eval_samples_per_second": 32.163, | |
| "eval_steps_per_second": 1.029, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.449, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4473, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_loss": 0.41637736558914185, | |
| "eval_runtime": 15.7487, | |
| "eval_samples_per_second": 31.749, | |
| "eval_steps_per_second": 1.016, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4468, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4447, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_loss": 0.4148881733417511, | |
| "eval_runtime": 15.3622, | |
| "eval_samples_per_second": 32.547, | |
| "eval_steps_per_second": 1.042, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4437, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4433, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_loss": 0.4144207835197449, | |
| "eval_runtime": 30.4128, | |
| "eval_samples_per_second": 16.44, | |
| "eval_steps_per_second": 0.526, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4431, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4413, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_loss": 0.4120546877384186, | |
| "eval_runtime": 14.9708, | |
| "eval_samples_per_second": 33.398, | |
| "eval_steps_per_second": 1.069, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4401, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4395, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "eval_loss": 0.40858784317970276, | |
| "eval_runtime": 16.4691, | |
| "eval_samples_per_second": 30.36, | |
| "eval_steps_per_second": 0.972, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4411, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4391, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "eval_loss": 0.40859168767929077, | |
| "eval_runtime": 16.6715, | |
| "eval_samples_per_second": 29.991, | |
| "eval_steps_per_second": 0.96, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4385, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4371, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "eval_loss": 0.4050961434841156, | |
| "eval_runtime": 14.6709, | |
| "eval_samples_per_second": 34.081, | |
| "eval_steps_per_second": 1.091, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4358, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4363, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "eval_loss": 0.4048325717449188, | |
| "eval_runtime": 16.0756, | |
| "eval_samples_per_second": 31.103, | |
| "eval_steps_per_second": 0.995, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4352, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4346, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_loss": 0.4037468731403351, | |
| "eval_runtime": 16.4235, | |
| "eval_samples_per_second": 30.444, | |
| "eval_steps_per_second": 0.974, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4336, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4335, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_loss": 0.402103453874588, | |
| "eval_runtime": 28.6118, | |
| "eval_samples_per_second": 17.475, | |
| "eval_steps_per_second": 0.559, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4325, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4319, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "eval_loss": 0.4030299186706543, | |
| "eval_runtime": 16.452, | |
| "eval_samples_per_second": 30.391, | |
| "eval_steps_per_second": 0.973, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4311, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4317, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_loss": 0.40188169479370117, | |
| "eval_runtime": 15.416, | |
| "eval_samples_per_second": 32.434, | |
| "eval_steps_per_second": 1.038, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4313, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4296, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "eval_loss": 0.39878711104393005, | |
| "eval_runtime": 16.1844, | |
| "eval_samples_per_second": 30.894, | |
| "eval_steps_per_second": 0.989, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4288, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4278, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "eval_loss": 0.3984658718109131, | |
| "eval_runtime": 17.0912, | |
| "eval_samples_per_second": 29.255, | |
| "eval_steps_per_second": 0.936, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4278, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4276, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "eval_loss": 0.3981262743473053, | |
| "eval_runtime": 16.5906, | |
| "eval_samples_per_second": 30.138, | |
| "eval_steps_per_second": 0.964, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.428, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4264, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_loss": 0.39774054288864136, | |
| "eval_runtime": 24.4452, | |
| "eval_samples_per_second": 20.454, | |
| "eval_steps_per_second": 0.655, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.427, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4267, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "eval_loss": 0.3962687849998474, | |
| "eval_runtime": 16.5048, | |
| "eval_samples_per_second": 30.294, | |
| "eval_steps_per_second": 0.969, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4271, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4252, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "eval_loss": 0.3965121805667877, | |
| "eval_runtime": 16.1623, | |
| "eval_samples_per_second": 30.936, | |
| "eval_steps_per_second": 0.99, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4255, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.425, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "eval_loss": 0.39477214217185974, | |
| "eval_runtime": 15.7512, | |
| "eval_samples_per_second": 31.744, | |
| "eval_steps_per_second": 1.016, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4248, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4248, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "eval_loss": 0.395481139421463, | |
| "eval_runtime": 15.4129, | |
| "eval_samples_per_second": 32.44, | |
| "eval_steps_per_second": 1.038, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4246, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.424, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "eval_loss": 0.3951389193534851, | |
| "eval_runtime": 15.7676, | |
| "eval_samples_per_second": 31.711, | |
| "eval_steps_per_second": 1.015, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4241, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4234, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "eval_loss": 0.3956534266471863, | |
| "eval_runtime": 15.8104, | |
| "eval_samples_per_second": 31.625, | |
| "eval_steps_per_second": 1.012, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.422, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4227, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "eval_loss": 0.3907557427883148, | |
| "eval_runtime": 16.5808, | |
| "eval_samples_per_second": 30.155, | |
| "eval_steps_per_second": 0.965, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4213, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.421, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "eval_loss": 0.3934537172317505, | |
| "eval_runtime": 24.4217, | |
| "eval_samples_per_second": 20.474, | |
| "eval_steps_per_second": 0.655, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4207, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4206, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "eval_loss": 0.3901897072792053, | |
| "eval_runtime": 16.8693, | |
| "eval_samples_per_second": 29.64, | |
| "eval_steps_per_second": 0.948, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4202, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4196, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "eval_loss": 0.3905479609966278, | |
| "eval_runtime": 16.5144, | |
| "eval_samples_per_second": 30.277, | |
| "eval_steps_per_second": 0.969, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4191, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4205, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "eval_loss": 0.390372633934021, | |
| "eval_runtime": 16.8904, | |
| "eval_samples_per_second": 29.603, | |
| "eval_steps_per_second": 0.947, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4195, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4194, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "eval_loss": 0.38856348395347595, | |
| "eval_runtime": 16.5028, | |
| "eval_samples_per_second": 30.298, | |
| "eval_steps_per_second": 0.97, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4193, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4208, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "eval_loss": 0.3889642059803009, | |
| "eval_runtime": 28.0106, | |
| "eval_samples_per_second": 17.85, | |
| "eval_steps_per_second": 0.571, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4189, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4187, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "eval_loss": 0.3886989653110504, | |
| "eval_runtime": 15.6007, | |
| "eval_samples_per_second": 32.05, | |
| "eval_steps_per_second": 1.026, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4181, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.417, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "eval_loss": 0.3878667950630188, | |
| "eval_runtime": 14.893, | |
| "eval_samples_per_second": 33.573, | |
| "eval_steps_per_second": 1.074, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4176, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4164, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "eval_loss": 0.3855785131454468, | |
| "eval_runtime": 15.2409, | |
| "eval_samples_per_second": 32.806, | |
| "eval_steps_per_second": 1.05, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4167, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.417, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "eval_loss": 0.38663551211357117, | |
| "eval_runtime": 24.5074, | |
| "eval_samples_per_second": 20.402, | |
| "eval_steps_per_second": 0.653, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4168, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4159, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "eval_loss": 0.38440173864364624, | |
| "eval_runtime": 30.9795, | |
| "eval_samples_per_second": 16.14, | |
| "eval_steps_per_second": 0.516, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4151, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4155, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "eval_loss": 0.3864738941192627, | |
| "eval_runtime": 24.9969, | |
| "eval_samples_per_second": 20.002, | |
| "eval_steps_per_second": 0.64, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4157, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4158, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "eval_loss": 0.3862515091896057, | |
| "eval_runtime": 28.5688, | |
| "eval_samples_per_second": 17.502, | |
| "eval_steps_per_second": 0.56, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4147, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4134, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "eval_loss": 0.38480713963508606, | |
| "eval_runtime": 27.3513, | |
| "eval_samples_per_second": 18.281, | |
| "eval_steps_per_second": 0.585, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4147, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4145, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "eval_loss": 0.3854221701622009, | |
| "eval_runtime": 27.205, | |
| "eval_samples_per_second": 18.379, | |
| "eval_steps_per_second": 0.588, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4149, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4143, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.38265106081962585, | |
| "eval_runtime": 26.169, | |
| "eval_samples_per_second": 19.107, | |
| "eval_steps_per_second": 0.611, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4129, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4144, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "eval_loss": 0.382869690656662, | |
| "eval_runtime": 25.2103, | |
| "eval_samples_per_second": 19.833, | |
| "eval_steps_per_second": 0.635, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4131, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4147, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "eval_loss": 0.38291990756988525, | |
| "eval_runtime": 36.6033, | |
| "eval_samples_per_second": 13.66, | |
| "eval_steps_per_second": 0.437, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4125, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4143, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "eval_loss": 0.3828723728656769, | |
| "eval_runtime": 27.6434, | |
| "eval_samples_per_second": 18.088, | |
| "eval_steps_per_second": 0.579, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4127, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4131, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "eval_loss": 0.3833463191986084, | |
| "eval_runtime": 50.874, | |
| "eval_samples_per_second": 9.828, | |
| "eval_steps_per_second": 0.315, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4129, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4129, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "eval_loss": 0.38282835483551025, | |
| "eval_runtime": 18.4975, | |
| "eval_samples_per_second": 27.031, | |
| "eval_steps_per_second": 0.865, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.412, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4121, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "eval_loss": 0.3821110427379608, | |
| "eval_runtime": 19.4342, | |
| "eval_samples_per_second": 25.728, | |
| "eval_steps_per_second": 0.823, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4112, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4116, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "eval_loss": 0.3829655051231384, | |
| "eval_runtime": 21.2757, | |
| "eval_samples_per_second": 23.501, | |
| "eval_steps_per_second": 0.752, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4108, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4104, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "eval_loss": 0.3811788260936737, | |
| "eval_runtime": 16.6571, | |
| "eval_samples_per_second": 30.017, | |
| "eval_steps_per_second": 0.961, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.411, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4108, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "eval_loss": 0.38048413395881653, | |
| "eval_runtime": 27.2288, | |
| "eval_samples_per_second": 18.363, | |
| "eval_steps_per_second": 0.588, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4112, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4098, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "eval_loss": 0.3806820809841156, | |
| "eval_runtime": 28.3585, | |
| "eval_samples_per_second": 17.631, | |
| "eval_steps_per_second": 0.564, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4083, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4097, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "eval_loss": 0.38133466243743896, | |
| "eval_runtime": 31.8927, | |
| "eval_samples_per_second": 15.678, | |
| "eval_steps_per_second": 0.502, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.41, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4098, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "eval_loss": 0.380397766828537, | |
| "eval_runtime": 29.3164, | |
| "eval_samples_per_second": 17.055, | |
| "eval_steps_per_second": 0.546, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4094, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4092, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "eval_loss": 0.38139721751213074, | |
| "eval_runtime": 19.7764, | |
| "eval_samples_per_second": 25.283, | |
| "eval_steps_per_second": 0.809, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4108, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4094, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "eval_loss": 0.3796501159667969, | |
| "eval_runtime": 18.1293, | |
| "eval_samples_per_second": 27.58, | |
| "eval_steps_per_second": 0.883, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4092, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4091, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "eval_loss": 0.3790924549102783, | |
| "eval_runtime": 20.9048, | |
| "eval_samples_per_second": 23.918, | |
| "eval_steps_per_second": 0.765, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.408, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4102, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "eval_loss": 0.3805426061153412, | |
| "eval_runtime": 27.4404, | |
| "eval_samples_per_second": 18.221, | |
| "eval_steps_per_second": 0.583, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4086, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4087, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "eval_loss": 0.37830984592437744, | |
| "eval_runtime": 14.8851, | |
| "eval_samples_per_second": 33.591, | |
| "eval_steps_per_second": 1.075, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4081, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4083, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "eval_loss": 0.3796636164188385, | |
| "eval_runtime": 17.3567, | |
| "eval_samples_per_second": 28.807, | |
| "eval_steps_per_second": 0.922, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4078, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4078, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "eval_loss": 0.3783106803894043, | |
| "eval_runtime": 29.6676, | |
| "eval_samples_per_second": 16.853, | |
| "eval_steps_per_second": 0.539, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4067, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4072, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "eval_loss": 0.3780921399593353, | |
| "eval_runtime": 15.2739, | |
| "eval_samples_per_second": 32.736, | |
| "eval_steps_per_second": 1.048, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4079, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4057, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "eval_loss": 0.37833890318870544, | |
| "eval_runtime": 17.1263, | |
| "eval_samples_per_second": 29.195, | |
| "eval_steps_per_second": 0.934, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.406, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4065, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "eval_loss": 0.37815991044044495, | |
| "eval_runtime": 19.0772, | |
| "eval_samples_per_second": 26.209, | |
| "eval_steps_per_second": 0.839, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4063, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4062, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "eval_loss": 0.3770570158958435, | |
| "eval_runtime": 15.6266, | |
| "eval_samples_per_second": 31.997, | |
| "eval_steps_per_second": 1.024, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4073, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4051, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "eval_loss": 0.3775251507759094, | |
| "eval_runtime": 16.0318, | |
| "eval_samples_per_second": 31.188, | |
| "eval_steps_per_second": 0.998, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4066, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4057, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "eval_loss": 0.37701237201690674, | |
| "eval_runtime": 15.6982, | |
| "eval_samples_per_second": 31.851, | |
| "eval_steps_per_second": 1.019, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4067, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4061, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "eval_loss": 0.37806421518325806, | |
| "eval_runtime": 15.7852, | |
| "eval_samples_per_second": 31.675, | |
| "eval_steps_per_second": 1.014, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4053, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.405, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "eval_loss": 0.3771826922893524, | |
| "eval_runtime": 15.5158, | |
| "eval_samples_per_second": 32.225, | |
| "eval_steps_per_second": 1.031, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4064, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4053, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "eval_loss": 0.377290278673172, | |
| "eval_runtime": 23.3698, | |
| "eval_samples_per_second": 21.395, | |
| "eval_steps_per_second": 0.685, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.406, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4054, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "eval_loss": 0.3762701749801636, | |
| "eval_runtime": 15.2662, | |
| "eval_samples_per_second": 32.752, | |
| "eval_steps_per_second": 1.048, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4047, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4043, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "eval_loss": 0.3773665130138397, | |
| "eval_runtime": 23.0339, | |
| "eval_samples_per_second": 21.707, | |
| "eval_steps_per_second": 0.695, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4041, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4044, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "eval_loss": 0.3738757371902466, | |
| "eval_runtime": 16.5496, | |
| "eval_samples_per_second": 30.212, | |
| "eval_steps_per_second": 0.967, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4038, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4038, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "eval_loss": 0.37452879548072815, | |
| "eval_runtime": 16.7684, | |
| "eval_samples_per_second": 29.818, | |
| "eval_steps_per_second": 0.954, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4039, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4045, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "eval_loss": 0.3761942684650421, | |
| "eval_runtime": 16.6694, | |
| "eval_samples_per_second": 29.995, | |
| "eval_steps_per_second": 0.96, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4036, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4035, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "eval_loss": 0.3746860921382904, | |
| "eval_runtime": 15.7109, | |
| "eval_samples_per_second": 31.825, | |
| "eval_steps_per_second": 1.018, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4037, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4045, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "eval_loss": 0.37363681197166443, | |
| "eval_runtime": 22.9088, | |
| "eval_samples_per_second": 21.826, | |
| "eval_steps_per_second": 0.698, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4035, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4031, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "eval_loss": 0.37529370188713074, | |
| "eval_runtime": 14.7314, | |
| "eval_samples_per_second": 33.941, | |
| "eval_steps_per_second": 1.086, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.402, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4042, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "eval_loss": 0.37475818395614624, | |
| "eval_runtime": 15.8331, | |
| "eval_samples_per_second": 31.579, | |
| "eval_steps_per_second": 1.011, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4032, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4029, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "eval_loss": 0.3748987317085266, | |
| "eval_runtime": 17.2956, | |
| "eval_samples_per_second": 28.909, | |
| "eval_steps_per_second": 0.925, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4034, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4029, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "eval_loss": 0.37344664335250854, | |
| "eval_runtime": 15.9881, | |
| "eval_samples_per_second": 31.273, | |
| "eval_steps_per_second": 1.001, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4043, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4019, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "eval_loss": 0.3718353509902954, | |
| "eval_runtime": 15.2483, | |
| "eval_samples_per_second": 32.791, | |
| "eval_steps_per_second": 1.049, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.403, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4023, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "eval_loss": 0.37316328287124634, | |
| "eval_runtime": 26.8261, | |
| "eval_samples_per_second": 18.639, | |
| "eval_steps_per_second": 0.596, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.402, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4022, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "eval_loss": 0.3714210093021393, | |
| "eval_runtime": 15.634, | |
| "eval_samples_per_second": 31.982, | |
| "eval_steps_per_second": 1.023, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4017, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4019, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "eval_loss": 0.3728122413158417, | |
| "eval_runtime": 16.3123, | |
| "eval_samples_per_second": 30.652, | |
| "eval_steps_per_second": 0.981, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 7.7, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4017, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4016, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "eval_loss": 0.3734327256679535, | |
| "eval_runtime": 17.5409, | |
| "eval_samples_per_second": 28.505, | |
| "eval_steps_per_second": 0.912, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.3998, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4006, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "eval_loss": 0.3747243583202362, | |
| "eval_runtime": 17.4755, | |
| "eval_samples_per_second": 28.611, | |
| "eval_steps_per_second": 0.916, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 7.87, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4013, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4008, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "eval_loss": 0.37303251028060913, | |
| "eval_runtime": 17.0856, | |
| "eval_samples_per_second": 29.264, | |
| "eval_steps_per_second": 0.936, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4008, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.402, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 0.37281692028045654, | |
| "eval_runtime": 17.9894, | |
| "eval_samples_per_second": 27.794, | |
| "eval_steps_per_second": 0.889, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4005, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4008, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "eval_loss": 0.37092164158821106, | |
| "eval_runtime": 17.2285, | |
| "eval_samples_per_second": 29.022, | |
| "eval_steps_per_second": 0.929, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.3997, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4024, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "eval_loss": 0.37120264768600464, | |
| "eval_runtime": 16.2125, | |
| "eval_samples_per_second": 30.84, | |
| "eval_steps_per_second": 0.987, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 8.21, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.3997, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 8.25, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.402, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 8.25, | |
| "eval_loss": 0.37261128425598145, | |
| "eval_runtime": 16.3463, | |
| "eval_samples_per_second": 30.588, | |
| "eval_steps_per_second": 0.979, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 8.29, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4004, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 8.34, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4003, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 8.34, | |
| "eval_loss": 0.37027257680892944, | |
| "eval_runtime": 20.0807, | |
| "eval_samples_per_second": 24.9, | |
| "eval_steps_per_second": 0.797, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 8.38, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4002, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 8.42, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4007, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 8.42, | |
| "eval_loss": 0.37140411138534546, | |
| "eval_runtime": 21.4309, | |
| "eval_samples_per_second": 23.331, | |
| "eval_steps_per_second": 0.747, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 8.51, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.3997, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 8.51, | |
| "eval_loss": 0.3693406283855438, | |
| "eval_runtime": 17.2208, | |
| "eval_samples_per_second": 29.035, | |
| "eval_steps_per_second": 0.929, | |
| "step": 100000 | |
| } | |
| ], | |
| "max_steps": 1000000, | |
| "num_train_epochs": 86, | |
| "total_flos": 4.600200440697905e+21, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |