| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 99.9971346704871, | |
| "global_step": 17400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.15, | |
| "eval_loss": 3.23046875, | |
| "eval_runtime": 11.7347, | |
| "eval_samples_per_second": 68.259, | |
| "eval_steps_per_second": 8.607, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "eval_loss": 3.255859375, | |
| "eval_runtime": 11.7326, | |
| "eval_samples_per_second": 68.271, | |
| "eval_steps_per_second": 8.608, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 5e-05, | |
| "loss": 2.0922, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "eval_loss": 3.279296875, | |
| "eval_runtime": 11.7456, | |
| "eval_samples_per_second": 68.196, | |
| "eval_steps_per_second": 8.599, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "eval_loss": 3.294921875, | |
| "eval_runtime": 11.7533, | |
| "eval_samples_per_second": 68.151, | |
| "eval_steps_per_second": 8.593, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 5e-05, | |
| "loss": 1.931, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "eval_loss": 3.310546875, | |
| "eval_runtime": 11.7529, | |
| "eval_samples_per_second": 68.153, | |
| "eval_steps_per_second": 8.594, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "eval_loss": 3.30078125, | |
| "eval_runtime": 11.76, | |
| "eval_samples_per_second": 68.112, | |
| "eval_steps_per_second": 8.588, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 8.05, | |
| "eval_loss": 3.3359375, | |
| "eval_runtime": 11.7654, | |
| "eval_samples_per_second": 68.081, | |
| "eval_steps_per_second": 8.584, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 8.62, | |
| "learning_rate": 5e-05, | |
| "loss": 1.8589, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 9.19, | |
| "eval_loss": 3.341796875, | |
| "eval_runtime": 11.7717, | |
| "eval_samples_per_second": 68.045, | |
| "eval_steps_per_second": 8.58, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 10.34, | |
| "eval_loss": 3.353515625, | |
| "eval_runtime": 11.7648, | |
| "eval_samples_per_second": 68.085, | |
| "eval_steps_per_second": 8.585, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 11.49, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7982, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 11.49, | |
| "eval_loss": 3.373046875, | |
| "eval_runtime": 11.771, | |
| "eval_samples_per_second": 68.049, | |
| "eval_steps_per_second": 8.58, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 12.64, | |
| "eval_loss": 3.37109375, | |
| "eval_runtime": 11.7674, | |
| "eval_samples_per_second": 68.069, | |
| "eval_steps_per_second": 8.583, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 13.79, | |
| "eval_loss": 3.40625, | |
| "eval_runtime": 11.7675, | |
| "eval_samples_per_second": 68.069, | |
| "eval_steps_per_second": 8.583, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 14.37, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7446, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 14.94, | |
| "eval_loss": 3.4140625, | |
| "eval_runtime": 11.7686, | |
| "eval_samples_per_second": 68.062, | |
| "eval_steps_per_second": 8.582, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 16.09, | |
| "eval_loss": 3.45703125, | |
| "eval_runtime": 11.7612, | |
| "eval_samples_per_second": 68.105, | |
| "eval_steps_per_second": 8.588, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 17.24, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6947, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 17.24, | |
| "eval_loss": 3.498046875, | |
| "eval_runtime": 11.7651, | |
| "eval_samples_per_second": 68.082, | |
| "eval_steps_per_second": 8.585, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 18.39, | |
| "eval_loss": 3.51171875, | |
| "eval_runtime": 11.7624, | |
| "eval_samples_per_second": 68.098, | |
| "eval_steps_per_second": 8.587, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 19.54, | |
| "eval_loss": 3.533203125, | |
| "eval_runtime": 11.7616, | |
| "eval_samples_per_second": 68.103, | |
| "eval_steps_per_second": 8.587, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 20.11, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6464, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 20.69, | |
| "eval_loss": 3.55078125, | |
| "eval_runtime": 11.7586, | |
| "eval_samples_per_second": 68.121, | |
| "eval_steps_per_second": 8.589, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 21.84, | |
| "eval_loss": 3.580078125, | |
| "eval_runtime": 11.772, | |
| "eval_samples_per_second": 68.043, | |
| "eval_steps_per_second": 8.58, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 22.99, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5981, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 22.99, | |
| "eval_loss": 3.6015625, | |
| "eval_runtime": 11.7661, | |
| "eval_samples_per_second": 68.077, | |
| "eval_steps_per_second": 8.584, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 24.14, | |
| "eval_loss": 3.6796875, | |
| "eval_runtime": 11.7648, | |
| "eval_samples_per_second": 68.085, | |
| "eval_steps_per_second": 8.585, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 25.29, | |
| "eval_loss": 3.7265625, | |
| "eval_runtime": 11.7655, | |
| "eval_samples_per_second": 68.08, | |
| "eval_steps_per_second": 8.584, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 25.86, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5539, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 26.44, | |
| "eval_loss": 3.736328125, | |
| "eval_runtime": 11.7698, | |
| "eval_samples_per_second": 68.056, | |
| "eval_steps_per_second": 8.581, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 27.58, | |
| "eval_loss": 3.755859375, | |
| "eval_runtime": 11.7676, | |
| "eval_samples_per_second": 68.068, | |
| "eval_steps_per_second": 8.583, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 28.73, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5105, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 28.73, | |
| "eval_loss": 3.79296875, | |
| "eval_runtime": 11.7657, | |
| "eval_samples_per_second": 68.079, | |
| "eval_steps_per_second": 8.584, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 29.88, | |
| "eval_loss": 3.859375, | |
| "eval_runtime": 11.7702, | |
| "eval_samples_per_second": 68.053, | |
| "eval_steps_per_second": 8.581, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 31.03, | |
| "eval_loss": 3.951171875, | |
| "eval_runtime": 11.7682, | |
| "eval_samples_per_second": 68.065, | |
| "eval_steps_per_second": 8.582, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 31.61, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4699, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 32.18, | |
| "eval_loss": 3.953125, | |
| "eval_runtime": 11.768, | |
| "eval_samples_per_second": 68.066, | |
| "eval_steps_per_second": 8.583, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 33.33, | |
| "eval_loss": 3.95703125, | |
| "eval_runtime": 11.7717, | |
| "eval_samples_per_second": 68.044, | |
| "eval_steps_per_second": 8.58, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 34.48, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4317, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 34.48, | |
| "eval_loss": 4.0234375, | |
| "eval_runtime": 11.7662, | |
| "eval_samples_per_second": 68.076, | |
| "eval_steps_per_second": 8.584, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 35.63, | |
| "eval_loss": 4.0859375, | |
| "eval_runtime": 11.7718, | |
| "eval_samples_per_second": 68.044, | |
| "eval_steps_per_second": 8.58, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 36.78, | |
| "eval_loss": 4.09765625, | |
| "eval_runtime": 11.7694, | |
| "eval_samples_per_second": 68.058, | |
| "eval_steps_per_second": 8.582, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 37.36, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3947, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 37.93, | |
| "eval_loss": 4.11328125, | |
| "eval_runtime": 11.769, | |
| "eval_samples_per_second": 68.06, | |
| "eval_steps_per_second": 8.582, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 39.08, | |
| "eval_loss": 4.1796875, | |
| "eval_runtime": 11.7687, | |
| "eval_samples_per_second": 68.062, | |
| "eval_steps_per_second": 8.582, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 40.23, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3589, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 40.23, | |
| "eval_loss": 4.20703125, | |
| "eval_runtime": 11.7645, | |
| "eval_samples_per_second": 68.086, | |
| "eval_steps_per_second": 8.585, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 41.38, | |
| "eval_loss": 4.2734375, | |
| "eval_runtime": 11.7699, | |
| "eval_samples_per_second": 68.055, | |
| "eval_steps_per_second": 8.581, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 42.53, | |
| "eval_loss": 4.29296875, | |
| "eval_runtime": 11.7707, | |
| "eval_samples_per_second": 68.05, | |
| "eval_steps_per_second": 8.581, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 43.1, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3248, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 43.68, | |
| "eval_loss": 4.30859375, | |
| "eval_runtime": 11.7694, | |
| "eval_samples_per_second": 68.058, | |
| "eval_steps_per_second": 8.582, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 44.83, | |
| "eval_loss": 4.34765625, | |
| "eval_runtime": 11.7695, | |
| "eval_samples_per_second": 68.057, | |
| "eval_steps_per_second": 8.582, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 45.97, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2899, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 45.97, | |
| "eval_loss": 4.375, | |
| "eval_runtime": 11.7703, | |
| "eval_samples_per_second": 68.053, | |
| "eval_steps_per_second": 8.581, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 47.13, | |
| "eval_loss": 4.4609375, | |
| "eval_runtime": 11.7682, | |
| "eval_samples_per_second": 68.065, | |
| "eval_steps_per_second": 8.582, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 48.28, | |
| "eval_loss": 4.47265625, | |
| "eval_runtime": 11.77, | |
| "eval_samples_per_second": 68.054, | |
| "eval_steps_per_second": 8.581, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 48.85, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2585, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 49.42, | |
| "eval_loss": 4.5546875, | |
| "eval_runtime": 11.7706, | |
| "eval_samples_per_second": 68.051, | |
| "eval_steps_per_second": 8.581, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 50.57, | |
| "eval_loss": 4.54296875, | |
| "eval_runtime": 11.7686, | |
| "eval_samples_per_second": 68.062, | |
| "eval_steps_per_second": 8.582, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 51.72, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2273, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 51.72, | |
| "eval_loss": 4.55859375, | |
| "eval_runtime": 11.7687, | |
| "eval_samples_per_second": 68.062, | |
| "eval_steps_per_second": 8.582, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 52.87, | |
| "eval_loss": 4.58984375, | |
| "eval_runtime": 11.7666, | |
| "eval_samples_per_second": 68.074, | |
| "eval_steps_per_second": 8.584, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 54.02, | |
| "eval_loss": 4.62109375, | |
| "eval_runtime": 11.7682, | |
| "eval_samples_per_second": 68.065, | |
| "eval_steps_per_second": 8.582, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 54.6, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1995, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 55.17, | |
| "eval_loss": 4.671875, | |
| "eval_runtime": 11.7698, | |
| "eval_samples_per_second": 68.056, | |
| "eval_steps_per_second": 8.581, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 56.32, | |
| "eval_loss": 4.71875, | |
| "eval_runtime": 11.7677, | |
| "eval_samples_per_second": 68.068, | |
| "eval_steps_per_second": 8.583, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 57.47, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1713, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 57.47, | |
| "eval_loss": 4.72265625, | |
| "eval_runtime": 11.7696, | |
| "eval_samples_per_second": 68.057, | |
| "eval_steps_per_second": 8.581, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 58.62, | |
| "eval_loss": 4.7578125, | |
| "eval_runtime": 11.7712, | |
| "eval_samples_per_second": 68.047, | |
| "eval_steps_per_second": 8.58, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 59.77, | |
| "eval_loss": 4.77734375, | |
| "eval_runtime": 11.7676, | |
| "eval_samples_per_second": 68.068, | |
| "eval_steps_per_second": 8.583, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 60.34, | |
| "learning_rate": 5e-05, | |
| "loss": 1.144, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 60.92, | |
| "eval_loss": 4.78515625, | |
| "eval_runtime": 11.7665, | |
| "eval_samples_per_second": 68.075, | |
| "eval_steps_per_second": 8.584, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 62.07, | |
| "eval_loss": 4.859375, | |
| "eval_runtime": 11.7665, | |
| "eval_samples_per_second": 68.074, | |
| "eval_steps_per_second": 8.584, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 63.22, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1196, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 63.22, | |
| "eval_loss": 4.859375, | |
| "eval_runtime": 11.7664, | |
| "eval_samples_per_second": 68.075, | |
| "eval_steps_per_second": 8.584, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 64.37, | |
| "eval_loss": 4.90234375, | |
| "eval_runtime": 11.767, | |
| "eval_samples_per_second": 68.072, | |
| "eval_steps_per_second": 8.583, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 65.52, | |
| "eval_loss": 4.921875, | |
| "eval_runtime": 11.7663, | |
| "eval_samples_per_second": 68.076, | |
| "eval_steps_per_second": 8.584, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 66.09, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0945, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 66.66, | |
| "eval_loss": 4.94140625, | |
| "eval_runtime": 11.7257, | |
| "eval_samples_per_second": 68.312, | |
| "eval_steps_per_second": 8.614, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 67.81, | |
| "eval_loss": 4.97265625, | |
| "eval_runtime": 11.7376, | |
| "eval_samples_per_second": 68.242, | |
| "eval_steps_per_second": 8.605, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 68.96, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0698, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 68.96, | |
| "eval_loss": 4.98046875, | |
| "eval_runtime": 11.7405, | |
| "eval_samples_per_second": 68.226, | |
| "eval_steps_per_second": 8.603, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 70.11, | |
| "eval_loss": 5.0234375, | |
| "eval_runtime": 11.7527, | |
| "eval_samples_per_second": 68.155, | |
| "eval_steps_per_second": 8.594, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 71.26, | |
| "eval_loss": 5.0546875, | |
| "eval_runtime": 11.7518, | |
| "eval_samples_per_second": 68.16, | |
| "eval_steps_per_second": 8.594, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 71.84, | |
| "learning_rate": 5e-05, | |
| "loss": 1.047, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 72.41, | |
| "eval_loss": 5.0859375, | |
| "eval_runtime": 11.7522, | |
| "eval_samples_per_second": 68.157, | |
| "eval_steps_per_second": 8.594, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 73.56, | |
| "eval_loss": 5.109375, | |
| "eval_runtime": 11.7581, | |
| "eval_samples_per_second": 68.123, | |
| "eval_steps_per_second": 8.59, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 74.71, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0242, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 74.71, | |
| "eval_loss": 5.1328125, | |
| "eval_runtime": 11.7589, | |
| "eval_samples_per_second": 68.119, | |
| "eval_steps_per_second": 8.589, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 75.86, | |
| "eval_loss": 5.1484375, | |
| "eval_runtime": 11.7654, | |
| "eval_samples_per_second": 68.081, | |
| "eval_steps_per_second": 8.585, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 77.01, | |
| "eval_loss": 5.171875, | |
| "eval_runtime": 11.7655, | |
| "eval_samples_per_second": 68.08, | |
| "eval_steps_per_second": 8.584, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 77.58, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0042, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 78.16, | |
| "eval_loss": 5.21484375, | |
| "eval_runtime": 11.7554, | |
| "eval_samples_per_second": 68.139, | |
| "eval_steps_per_second": 8.592, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 79.31, | |
| "eval_loss": 5.25390625, | |
| "eval_runtime": 11.7677, | |
| "eval_samples_per_second": 68.068, | |
| "eval_steps_per_second": 8.583, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 80.46, | |
| "learning_rate": 5e-05, | |
| "loss": 0.983, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 80.46, | |
| "eval_loss": 5.2421875, | |
| "eval_runtime": 11.7633, | |
| "eval_samples_per_second": 68.093, | |
| "eval_steps_per_second": 8.586, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 81.61, | |
| "eval_loss": 5.27734375, | |
| "eval_runtime": 11.7597, | |
| "eval_samples_per_second": 68.114, | |
| "eval_steps_per_second": 8.589, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 82.76, | |
| "eval_loss": 5.28515625, | |
| "eval_runtime": 11.7676, | |
| "eval_samples_per_second": 68.068, | |
| "eval_steps_per_second": 8.583, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 83.33, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9641, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 83.91, | |
| "eval_loss": 5.32421875, | |
| "eval_runtime": 11.7617, | |
| "eval_samples_per_second": 68.102, | |
| "eval_steps_per_second": 8.587, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 85.06, | |
| "eval_loss": 5.375, | |
| "eval_runtime": 11.7644, | |
| "eval_samples_per_second": 68.087, | |
| "eval_steps_per_second": 8.585, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 86.21, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9448, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 86.21, | |
| "eval_loss": 5.38671875, | |
| "eval_runtime": 11.7479, | |
| "eval_samples_per_second": 68.182, | |
| "eval_steps_per_second": 8.597, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 87.36, | |
| "eval_loss": 5.40625, | |
| "eval_runtime": 11.7632, | |
| "eval_samples_per_second": 68.094, | |
| "eval_steps_per_second": 8.586, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 88.5, | |
| "eval_loss": 5.41796875, | |
| "eval_runtime": 11.7578, | |
| "eval_samples_per_second": 68.125, | |
| "eval_steps_per_second": 8.59, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 89.08, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9253, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 89.65, | |
| "eval_loss": 5.453125, | |
| "eval_runtime": 11.7633, | |
| "eval_samples_per_second": 68.093, | |
| "eval_steps_per_second": 8.586, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 90.8, | |
| "eval_loss": 5.44921875, | |
| "eval_runtime": 11.755, | |
| "eval_samples_per_second": 68.141, | |
| "eval_steps_per_second": 8.592, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 91.95, | |
| "learning_rate": 5e-05, | |
| "loss": 0.907, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 91.95, | |
| "eval_loss": 5.484375, | |
| "eval_runtime": 11.7719, | |
| "eval_samples_per_second": 68.044, | |
| "eval_steps_per_second": 8.58, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 93.1, | |
| "eval_loss": 5.54296875, | |
| "eval_runtime": 11.7524, | |
| "eval_samples_per_second": 68.157, | |
| "eval_steps_per_second": 8.594, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 94.25, | |
| "eval_loss": 5.56640625, | |
| "eval_runtime": 11.762, | |
| "eval_samples_per_second": 68.1, | |
| "eval_steps_per_second": 8.587, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 94.83, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8889, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 95.4, | |
| "eval_loss": 5.58984375, | |
| "eval_runtime": 11.7505, | |
| "eval_samples_per_second": 68.167, | |
| "eval_steps_per_second": 8.595, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 96.55, | |
| "eval_loss": 5.59375, | |
| "eval_runtime": 11.7537, | |
| "eval_samples_per_second": 68.149, | |
| "eval_steps_per_second": 8.593, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 97.7, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8717, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 97.7, | |
| "eval_loss": 5.6328125, | |
| "eval_runtime": 11.7619, | |
| "eval_samples_per_second": 68.101, | |
| "eval_steps_per_second": 8.587, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 98.85, | |
| "eval_loss": 5.64453125, | |
| "eval_runtime": 11.7552, | |
| "eval_samples_per_second": 68.14, | |
| "eval_steps_per_second": 8.592, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_loss": 5.671875, | |
| "eval_runtime": 11.7611, | |
| "eval_samples_per_second": 68.106, | |
| "eval_steps_per_second": 8.588, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "step": 17400, | |
| "total_flos": 2.1345852491772723e+17, | |
| "train_loss": 0.3251776333512931, | |
| "train_runtime": 8837.2329, | |
| "train_samples_per_second": 31.594, | |
| "train_steps_per_second": 1.969 | |
| } | |
| ], | |
| "max_steps": 17400, | |
| "num_train_epochs": 100, | |
| "total_flos": 2.1345852491772723e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |