| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 400.0, | |
| "global_step": 15200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 5.26, | |
| "eval_loss": 1.7080078125, | |
| "eval_runtime": 3.7763, | |
| "eval_samples_per_second": 68.321, | |
| "eval_steps_per_second": 8.739, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 10.53, | |
| "eval_loss": 1.7001953125, | |
| "eval_runtime": 3.7858, | |
| "eval_samples_per_second": 68.149, | |
| "eval_steps_per_second": 8.717, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 13.16, | |
| "learning_rate": 5e-05, | |
| "loss": 1.671, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 15.79, | |
| "eval_loss": 1.7138671875, | |
| "eval_runtime": 3.7865, | |
| "eval_samples_per_second": 68.137, | |
| "eval_steps_per_second": 8.715, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 21.05, | |
| "eval_loss": 1.744140625, | |
| "eval_runtime": 3.7879, | |
| "eval_samples_per_second": 68.112, | |
| "eval_steps_per_second": 8.712, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 26.32, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4438, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 26.32, | |
| "eval_loss": 1.794921875, | |
| "eval_runtime": 3.7869, | |
| "eval_samples_per_second": 68.13, | |
| "eval_steps_per_second": 8.714, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 31.58, | |
| "eval_loss": 1.84375, | |
| "eval_runtime": 3.7879, | |
| "eval_samples_per_second": 68.112, | |
| "eval_steps_per_second": 8.712, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 36.84, | |
| "eval_loss": 1.896484375, | |
| "eval_runtime": 3.7905, | |
| "eval_samples_per_second": 68.066, | |
| "eval_steps_per_second": 8.706, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 39.47, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2806, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 42.11, | |
| "eval_loss": 1.9619140625, | |
| "eval_runtime": 3.7916, | |
| "eval_samples_per_second": 68.044, | |
| "eval_steps_per_second": 8.703, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 47.37, | |
| "eval_loss": 2.01953125, | |
| "eval_runtime": 3.7897, | |
| "eval_samples_per_second": 68.08, | |
| "eval_steps_per_second": 8.708, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 52.63, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1433, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 52.63, | |
| "eval_loss": 2.068359375, | |
| "eval_runtime": 3.7891, | |
| "eval_samples_per_second": 68.091, | |
| "eval_steps_per_second": 8.709, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 57.89, | |
| "eval_loss": 2.1171875, | |
| "eval_runtime": 3.7902, | |
| "eval_samples_per_second": 68.069, | |
| "eval_steps_per_second": 8.707, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 63.16, | |
| "eval_loss": 2.1953125, | |
| "eval_runtime": 3.7898, | |
| "eval_samples_per_second": 68.077, | |
| "eval_steps_per_second": 8.708, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 65.79, | |
| "learning_rate": 5e-05, | |
| "loss": 1.027, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 68.42, | |
| "eval_loss": 2.25, | |
| "eval_runtime": 3.7881, | |
| "eval_samples_per_second": 68.108, | |
| "eval_steps_per_second": 8.711, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 73.68, | |
| "eval_loss": 2.291015625, | |
| "eval_runtime": 3.7876, | |
| "eval_samples_per_second": 68.118, | |
| "eval_steps_per_second": 8.713, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 78.95, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9216, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 78.95, | |
| "eval_loss": 2.34765625, | |
| "eval_runtime": 3.7885, | |
| "eval_samples_per_second": 68.101, | |
| "eval_steps_per_second": 8.711, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 84.21, | |
| "eval_loss": 2.423828125, | |
| "eval_runtime": 3.7907, | |
| "eval_samples_per_second": 68.062, | |
| "eval_steps_per_second": 8.706, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 89.47, | |
| "eval_loss": 2.482421875, | |
| "eval_runtime": 3.7903, | |
| "eval_samples_per_second": 68.068, | |
| "eval_steps_per_second": 8.706, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 92.11, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8209, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 94.74, | |
| "eval_loss": 2.529296875, | |
| "eval_runtime": 3.7863, | |
| "eval_samples_per_second": 68.14, | |
| "eval_steps_per_second": 8.716, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_loss": 2.5859375, | |
| "eval_runtime": 3.785, | |
| "eval_samples_per_second": 68.164, | |
| "eval_steps_per_second": 8.719, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 105.26, | |
| "learning_rate": 5e-05, | |
| "loss": 0.7231, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 105.26, | |
| "eval_loss": 2.6640625, | |
| "eval_runtime": 3.7856, | |
| "eval_samples_per_second": 68.153, | |
| "eval_steps_per_second": 8.717, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 110.53, | |
| "eval_loss": 2.703125, | |
| "eval_runtime": 3.7862, | |
| "eval_samples_per_second": 68.142, | |
| "eval_steps_per_second": 8.716, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 115.79, | |
| "eval_loss": 2.78515625, | |
| "eval_runtime": 3.7894, | |
| "eval_samples_per_second": 68.084, | |
| "eval_steps_per_second": 8.708, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 118.42, | |
| "learning_rate": 5e-05, | |
| "loss": 0.6281, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 121.05, | |
| "eval_loss": 2.84375, | |
| "eval_runtime": 3.7883, | |
| "eval_samples_per_second": 68.105, | |
| "eval_steps_per_second": 8.711, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 126.32, | |
| "eval_loss": 2.921875, | |
| "eval_runtime": 3.79, | |
| "eval_samples_per_second": 68.074, | |
| "eval_steps_per_second": 8.707, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 131.58, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5384, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 131.58, | |
| "eval_loss": 2.994140625, | |
| "eval_runtime": 3.7895, | |
| "eval_samples_per_second": 68.082, | |
| "eval_steps_per_second": 8.708, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 136.84, | |
| "eval_loss": 3.048828125, | |
| "eval_runtime": 3.7912, | |
| "eval_samples_per_second": 68.053, | |
| "eval_steps_per_second": 8.704, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 142.11, | |
| "eval_loss": 3.107421875, | |
| "eval_runtime": 3.7872, | |
| "eval_samples_per_second": 68.123, | |
| "eval_steps_per_second": 8.713, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 144.74, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4574, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 147.37, | |
| "eval_loss": 3.169921875, | |
| "eval_runtime": 3.7886, | |
| "eval_samples_per_second": 68.1, | |
| "eval_steps_per_second": 8.71, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 152.63, | |
| "eval_loss": 3.2265625, | |
| "eval_runtime": 3.7924, | |
| "eval_samples_per_second": 68.03, | |
| "eval_steps_per_second": 8.702, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 157.89, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3848, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 157.89, | |
| "eval_loss": 3.291015625, | |
| "eval_runtime": 3.7859, | |
| "eval_samples_per_second": 68.148, | |
| "eval_steps_per_second": 8.717, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 163.16, | |
| "eval_loss": 3.376953125, | |
| "eval_runtime": 3.7886, | |
| "eval_samples_per_second": 68.099, | |
| "eval_steps_per_second": 8.71, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 168.42, | |
| "eval_loss": 3.408203125, | |
| "eval_runtime": 3.7885, | |
| "eval_samples_per_second": 68.1, | |
| "eval_steps_per_second": 8.71, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 171.05, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3224, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 173.68, | |
| "eval_loss": 3.4765625, | |
| "eval_runtime": 3.7922, | |
| "eval_samples_per_second": 68.034, | |
| "eval_steps_per_second": 8.702, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 178.95, | |
| "eval_loss": 3.529296875, | |
| "eval_runtime": 3.7898, | |
| "eval_samples_per_second": 68.077, | |
| "eval_steps_per_second": 8.708, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 184.21, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2697, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 184.21, | |
| "eval_loss": 3.591796875, | |
| "eval_runtime": 3.7854, | |
| "eval_samples_per_second": 68.157, | |
| "eval_steps_per_second": 8.718, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 189.47, | |
| "eval_loss": 3.634765625, | |
| "eval_runtime": 3.7918, | |
| "eval_samples_per_second": 68.041, | |
| "eval_steps_per_second": 8.703, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 194.74, | |
| "eval_loss": 3.68359375, | |
| "eval_runtime": 3.7891, | |
| "eval_samples_per_second": 68.09, | |
| "eval_steps_per_second": 8.709, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 197.37, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2258, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 200.0, | |
| "eval_loss": 3.7265625, | |
| "eval_runtime": 3.7895, | |
| "eval_samples_per_second": 68.083, | |
| "eval_steps_per_second": 8.708, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 205.26, | |
| "eval_loss": 3.79296875, | |
| "eval_runtime": 3.7901, | |
| "eval_samples_per_second": 68.073, | |
| "eval_steps_per_second": 8.707, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 210.53, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1893, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 210.53, | |
| "eval_loss": 3.828125, | |
| "eval_runtime": 3.7891, | |
| "eval_samples_per_second": 68.09, | |
| "eval_steps_per_second": 8.709, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 215.79, | |
| "eval_loss": 3.880859375, | |
| "eval_runtime": 3.7907, | |
| "eval_samples_per_second": 68.062, | |
| "eval_steps_per_second": 8.706, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 221.05, | |
| "eval_loss": 3.923828125, | |
| "eval_runtime": 3.7895, | |
| "eval_samples_per_second": 68.082, | |
| "eval_steps_per_second": 8.708, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 223.68, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1602, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 226.32, | |
| "eval_loss": 3.974609375, | |
| "eval_runtime": 3.7894, | |
| "eval_samples_per_second": 68.084, | |
| "eval_steps_per_second": 8.708, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 231.58, | |
| "eval_loss": 4.00390625, | |
| "eval_runtime": 3.7923, | |
| "eval_samples_per_second": 68.032, | |
| "eval_steps_per_second": 8.702, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 236.84, | |
| "learning_rate": 5e-05, | |
| "loss": 0.137, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 236.84, | |
| "eval_loss": 4.046875, | |
| "eval_runtime": 3.7922, | |
| "eval_samples_per_second": 68.034, | |
| "eval_steps_per_second": 8.702, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 242.11, | |
| "eval_loss": 4.07421875, | |
| "eval_runtime": 3.7901, | |
| "eval_samples_per_second": 68.072, | |
| "eval_steps_per_second": 8.707, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 247.37, | |
| "eval_loss": 4.12109375, | |
| "eval_runtime": 3.7896, | |
| "eval_samples_per_second": 68.08, | |
| "eval_steps_per_second": 8.708, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 250.0, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1179, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 252.63, | |
| "eval_loss": 4.15625, | |
| "eval_runtime": 3.7912, | |
| "eval_samples_per_second": 68.053, | |
| "eval_steps_per_second": 8.704, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 257.89, | |
| "eval_loss": 4.203125, | |
| "eval_runtime": 3.7923, | |
| "eval_samples_per_second": 68.032, | |
| "eval_steps_per_second": 8.702, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 263.16, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1024, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 263.16, | |
| "eval_loss": 4.234375, | |
| "eval_runtime": 3.7852, | |
| "eval_samples_per_second": 68.159, | |
| "eval_steps_per_second": 8.718, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 268.42, | |
| "eval_loss": 4.2734375, | |
| "eval_runtime": 3.7869, | |
| "eval_samples_per_second": 68.129, | |
| "eval_steps_per_second": 8.714, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 273.68, | |
| "eval_loss": 4.3046875, | |
| "eval_runtime": 3.7892, | |
| "eval_samples_per_second": 68.088, | |
| "eval_steps_per_second": 8.709, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 276.32, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0901, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 278.95, | |
| "eval_loss": 4.3125, | |
| "eval_runtime": 3.7869, | |
| "eval_samples_per_second": 68.129, | |
| "eval_steps_per_second": 8.714, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 284.21, | |
| "eval_loss": 4.375, | |
| "eval_runtime": 3.7872, | |
| "eval_samples_per_second": 68.125, | |
| "eval_steps_per_second": 8.714, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 289.47, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0796, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 289.47, | |
| "eval_loss": 4.390625, | |
| "eval_runtime": 3.7843, | |
| "eval_samples_per_second": 68.177, | |
| "eval_steps_per_second": 8.72, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 294.74, | |
| "eval_loss": 4.4375, | |
| "eval_runtime": 3.7881, | |
| "eval_samples_per_second": 68.107, | |
| "eval_steps_per_second": 8.711, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 300.0, | |
| "eval_loss": 4.453125, | |
| "eval_runtime": 3.7869, | |
| "eval_samples_per_second": 68.129, | |
| "eval_steps_per_second": 8.714, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 302.63, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0706, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 305.26, | |
| "eval_loss": 4.5078125, | |
| "eval_runtime": 3.7854, | |
| "eval_samples_per_second": 68.156, | |
| "eval_steps_per_second": 8.718, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 310.53, | |
| "eval_loss": 4.515625, | |
| "eval_runtime": 3.787, | |
| "eval_samples_per_second": 68.128, | |
| "eval_steps_per_second": 8.714, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 315.79, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0631, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 315.79, | |
| "eval_loss": 4.53515625, | |
| "eval_runtime": 3.7837, | |
| "eval_samples_per_second": 68.187, | |
| "eval_steps_per_second": 8.722, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 321.05, | |
| "eval_loss": 4.5859375, | |
| "eval_runtime": 3.7869, | |
| "eval_samples_per_second": 68.13, | |
| "eval_steps_per_second": 8.714, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 326.32, | |
| "eval_loss": 4.609375, | |
| "eval_runtime": 3.788, | |
| "eval_samples_per_second": 68.11, | |
| "eval_steps_per_second": 8.712, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 328.95, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0573, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 331.58, | |
| "eval_loss": 4.63671875, | |
| "eval_runtime": 3.7891, | |
| "eval_samples_per_second": 68.09, | |
| "eval_steps_per_second": 8.709, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 336.84, | |
| "eval_loss": 4.63671875, | |
| "eval_runtime": 3.7855, | |
| "eval_samples_per_second": 68.154, | |
| "eval_steps_per_second": 8.717, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 342.11, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0521, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 342.11, | |
| "eval_loss": 4.6640625, | |
| "eval_runtime": 3.7838, | |
| "eval_samples_per_second": 68.185, | |
| "eval_steps_per_second": 8.721, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 347.37, | |
| "eval_loss": 4.70703125, | |
| "eval_runtime": 3.7834, | |
| "eval_samples_per_second": 68.192, | |
| "eval_steps_per_second": 8.722, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 352.63, | |
| "eval_loss": 4.69921875, | |
| "eval_runtime": 3.789, | |
| "eval_samples_per_second": 68.092, | |
| "eval_steps_per_second": 8.709, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 355.26, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0475, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 357.89, | |
| "eval_loss": 4.75390625, | |
| "eval_runtime": 3.7901, | |
| "eval_samples_per_second": 68.073, | |
| "eval_steps_per_second": 8.707, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 363.16, | |
| "eval_loss": 4.765625, | |
| "eval_runtime": 3.7877, | |
| "eval_samples_per_second": 68.116, | |
| "eval_steps_per_second": 8.712, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 368.42, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0437, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 368.42, | |
| "eval_loss": 4.80078125, | |
| "eval_runtime": 3.7858, | |
| "eval_samples_per_second": 68.15, | |
| "eval_steps_per_second": 8.717, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 373.68, | |
| "eval_loss": 4.83203125, | |
| "eval_runtime": 3.7888, | |
| "eval_samples_per_second": 68.095, | |
| "eval_steps_per_second": 8.71, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 378.95, | |
| "eval_loss": 4.8515625, | |
| "eval_runtime": 3.7901, | |
| "eval_samples_per_second": 68.073, | |
| "eval_steps_per_second": 8.707, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 381.58, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0399, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 384.21, | |
| "eval_loss": 4.86328125, | |
| "eval_runtime": 3.7938, | |
| "eval_samples_per_second": 68.006, | |
| "eval_steps_per_second": 8.698, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 389.47, | |
| "eval_loss": 4.89453125, | |
| "eval_runtime": 3.7887, | |
| "eval_samples_per_second": 68.098, | |
| "eval_steps_per_second": 8.71, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 394.74, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0367, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 394.74, | |
| "eval_loss": 4.90625, | |
| "eval_runtime": 3.7864, | |
| "eval_samples_per_second": 68.138, | |
| "eval_steps_per_second": 8.715, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 400.0, | |
| "eval_loss": 4.94140625, | |
| "eval_runtime": 3.791, | |
| "eval_samples_per_second": 68.057, | |
| "eval_steps_per_second": 8.705, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 400.0, | |
| "step": 15200, | |
| "total_flos": 1.2604727427386573e+17, | |
| "train_loss": 0.4328666927939967, | |
| "train_runtime": 22235.2418, | |
| "train_samples_per_second": 10.848, | |
| "train_steps_per_second": 0.684 | |
| } | |
| ], | |
| "max_steps": 15200, | |
| "num_train_epochs": 400, | |
| "total_flos": 1.2604727427386573e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |