| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 12.548024703460445, | |
| "global_step": 160000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 5e-05, | |
| "loss": 2.0248, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 5e-05, | |
| "loss": 1.8708, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 5e-05, | |
| "loss": 1.8434, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7994, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 5e-05, | |
| "loss": 1.8287, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 5e-05, | |
| "loss": 1.8163, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 5e-05, | |
| "loss": 1.8126, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7973, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7749, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7852, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7927, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7736, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7792, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7559, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7508, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7809, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 5e-05, | |
| "loss": 1.767, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7906, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7622, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7726, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7597, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7649, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7352, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7408, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7453, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7417, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7524, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7461, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7584, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7485, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7532, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7221, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7351, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7286, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 5e-05, | |
| "loss": 1.724, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 5e-05, | |
| "loss": 1.711, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7233, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7075, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7264, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7189, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7265, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7145, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7188, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7173, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7454, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7071, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7312, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6972, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 5e-05, | |
| "loss": 1.717, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7042, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7075, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7037, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 5e-05, | |
| "loss": 1.689, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7099, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7175, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6906, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7112, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7183, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7158, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6972, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6869, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6896, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6895, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7044, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7103, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6788, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7091, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7046, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6683, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7084, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6933, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6533, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6701, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6786, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6886, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6805, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6651, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 5e-05, | |
| "loss": 1.666, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6671, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6979, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6823, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6944, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6571, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7135, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5e-05, | |
| "loss": 1.67, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6908, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 5e-05, | |
| "loss": 1.665, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6862, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6903, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6551, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6588, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6589, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 5e-05, | |
| "loss": 1.671, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6702, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6641, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6681, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6798, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6762, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6781, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_gen_len": 18.729231040875785, | |
| "eval_loss": 1.5352756977081299, | |
| "eval_rouge1": 38.2416, | |
| "eval_rouge2": 16.0401, | |
| "eval_rougeL": 31.3511, | |
| "eval_rougeLsum": 31.3586, | |
| "eval_runtime": 2357.8446, | |
| "eval_samples_per_second": 4.804, | |
| "eval_steps_per_second": 1.201, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6725, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6569, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5e-05, | |
| "loss": 1.631, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6549, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6714, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6827, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6725, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6517, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 5e-05, | |
| "loss": 1.647, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6536, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6545, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6697, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6329, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6601, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5e-05, | |
| "loss": 1.665, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6462, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6512, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 5e-05, | |
| "loss": 1.673, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6818, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6698, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6772, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6526, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6561, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6429, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6517, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6766, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6708, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6229, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5406, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5325, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5196, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5283, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5463, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5461, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5496, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5646, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5527, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 5e-05, | |
| "loss": 1.551, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5566, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5514, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5425, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5321, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5552, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5316, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5271, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5331, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5428, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5534, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5306, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5468, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 5e-05, | |
| "loss": 1.529, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5349, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5596, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5328, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5495, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5447, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5442, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5435, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5588, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5632, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5504, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5148, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5523, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 5e-05, | |
| "loss": 1.544, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5472, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5294, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5458, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5579, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5524, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5455, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5549, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5457, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5378, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5422, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5559, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5463, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5385, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5487, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5535, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5348, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5555, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5378, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5212, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5559, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5353, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5461, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5407, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5359, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5561, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5274, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5544, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5407, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5374, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5572, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5177, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5504, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5611, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5362, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5411, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5473, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "eval_gen_len": 18.753509314028427, | |
| "eval_loss": 1.5065853595733643, | |
| "eval_rouge1": 38.5975, | |
| "eval_rouge2": 16.6862, | |
| "eval_rougeL": 31.8033, | |
| "eval_rougeLsum": 31.8084, | |
| "eval_runtime": 2373.7069, | |
| "eval_samples_per_second": 4.772, | |
| "eval_steps_per_second": 1.193, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5304, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 5e-05, | |
| "loss": 1.55, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5375, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5366, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5254, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5369, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5455, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5337, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5411, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5416, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5477, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5276, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5643, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 5e-05, | |
| "loss": 1.558, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5548, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5486, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 5e-05, | |
| "loss": 1.551, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5494, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5526, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5294, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5288, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5093, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5276, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5321, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5655, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5438, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5507, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5506, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5377, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5447, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5361, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5568, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5498, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5405, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5198, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5227, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5163, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5811, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5303, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5416, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5296, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5521, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5387, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5311, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5539, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 5e-05, | |
| "loss": 1.53, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5588, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5402, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5269, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5407, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5141, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 5e-05, | |
| "loss": 1.523, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5112, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5314, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5345, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4314, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 5e-05, | |
| "loss": 1.445, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4215, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4116, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4264, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4238, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4172, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4319, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4174, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4293, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4393, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4474, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4227, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4232, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4294, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4118, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4277, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4171, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4413, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4119, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3893, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4255, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4221, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4339, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4394, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4249, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4458, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4323, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4167, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 5e-05, | |
| "loss": 1.431, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4568, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4716, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4147, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 5e-05, | |
| "loss": 1.427, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4643, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4436, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4321, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4464, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4052, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4278, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 5e-05, | |
| "loss": 1.401, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4458, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4517, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4541, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4334, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "eval_gen_len": 18.717400900503222, | |
| "eval_loss": 1.4981415271759033, | |
| "eval_rouge1": 39.0553, | |
| "eval_rouge2": 17.2294, | |
| "eval_rougeL": 32.3778, | |
| "eval_rougeLsum": 32.3837, | |
| "eval_runtime": 2349.3062, | |
| "eval_samples_per_second": 4.821, | |
| "eval_steps_per_second": 1.205, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4209, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 5e-05, | |
| "loss": 1.401, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4375, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4383, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4378, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 5e-05, | |
| "loss": 1.444, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 5e-05, | |
| "loss": 1.432, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4397, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4196, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4123, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4194, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4466, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4358, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4641, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4346, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4372, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4128, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4267, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4233, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4357, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4127, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4324, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4501, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 5e-05, | |
| "loss": 1.426, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 5e-05, | |
| "loss": 1.445, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4226, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4569, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4501, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4113, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4629, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4288, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4311, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4409, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4597, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4401, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4155, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 5e-05, | |
| "loss": 1.438, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4159, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4377, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4474, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4338, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4747, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5e-05, | |
| "loss": 1.475, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4491, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4372, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 5e-05, | |
| "loss": 1.455, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4535, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4649, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 5e-05, | |
| "loss": 1.457, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4422, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 5e-05, | |
| "loss": 1.431, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4279, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4487, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 5e-05, | |
| "loss": 1.456, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4208, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4449, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4324, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4399, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4257, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 5e-05, | |
| "loss": 1.457, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4549, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4525, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4587, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4319, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4267, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4644, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4477, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4342, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4348, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4384, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4432, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4382, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4454, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4196, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4476, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4554, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4315, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4471, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4523, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4562, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4413, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4608, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4121, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3319, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3408, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3191, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 5e-05, | |
| "loss": 1.326, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3219, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 5e-05, | |
| "loss": 1.333, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3286, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3439, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3256, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3363, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3229, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3285, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3242, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 5e-05, | |
| "loss": 1.329, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3486, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3239, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3582, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "eval_gen_len": 18.757482122362497, | |
| "eval_loss": 1.4984853267669678, | |
| "eval_rouge1": 39.0472, | |
| "eval_rouge2": 17.2043, | |
| "eval_rougeL": 32.2958, | |
| "eval_rougeLsum": 32.2919, | |
| "eval_runtime": 2351.4068, | |
| "eval_samples_per_second": 4.817, | |
| "eval_steps_per_second": 1.204, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 5e-05, | |
| "loss": 1.348, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3358, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3307, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3387, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3412, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3391, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 5e-05, | |
| "loss": 1.367, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3217, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3407, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3321, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3565, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 5e-05, | |
| "loss": 1.33, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3402, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3641, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3589, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 5e-05, | |
| "loss": 1.329, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3262, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3384, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3475, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3532, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3395, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3357, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3364, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 5e-05, | |
| "loss": 1.351, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3656, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3651, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3317, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3492, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3497, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3523, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3484, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3495, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3633, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3578, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 5e-05, | |
| "loss": 1.334, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3591, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 5e-05, | |
| "loss": 1.334, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3484, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3557, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3453, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 5e-05, | |
| "loss": 1.342, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3449, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3392, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3421, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3444, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3492, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3575, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3487, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3432, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3119, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3624, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3454, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3507, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3481, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3822, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3479, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3556, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3584, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3583, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3644, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3376, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3345, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3796, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3425, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3434, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 5e-05, | |
| "loss": 1.382, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3829, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3671, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3561, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3528, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3359, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3458, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3547, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3579, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3383, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3555, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3702, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3472, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3567, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3464, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3709, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3677, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3653, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3726, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3684, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3459, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3812, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3432, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3573, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 5e-05, | |
| "loss": 1.363, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3744, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3751, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3629, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3549, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3704, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3743, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3361, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3684, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3545, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3716, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "eval_gen_len": 18.721197139578, | |
| "eval_loss": 1.4834269285202026, | |
| "eval_rouge1": 39.5208, | |
| "eval_rouge2": 17.6414, | |
| "eval_rougeL": 32.8518, | |
| "eval_rougeLsum": 32.8448, | |
| "eval_runtime": 2347.5327, | |
| "eval_samples_per_second": 4.825, | |
| "eval_steps_per_second": 1.206, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 5e-05, | |
| "loss": 1.355, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3659, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3819, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3587, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3802, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3681, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3567, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3517, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3769, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3688, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2497, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2667, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2817, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2461, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2406, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2409, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2674, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2651, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2241, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2928, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2599, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2408, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2664, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2434, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2654, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2663, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2511, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2763, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2623, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2667, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2627, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2648, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2609, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2636, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2592, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2517, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2881, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 5e-05, | |
| "loss": 1.254, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2749, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2544, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2681, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2929, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 5e-05, | |
| "loss": 1.273, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2709, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2504, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2867, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2771, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 5e-05, | |
| "loss": 1.257, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 5e-05, | |
| "loss": 1.287, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2629, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2612, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2433, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2655, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2762, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2714, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2821, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 5e-05, | |
| "loss": 1.265, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2547, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2555, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2784, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2729, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2978, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2682, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2884, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2571, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2852, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2858, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2736, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2969, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2767, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2777, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2589, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2571, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2631, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2696, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2654, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2738, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2798, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 5e-05, | |
| "loss": 1.301, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2678, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2659, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2779, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2936, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2843, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2633, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2969, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2624, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2952, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2845, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2759, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2948, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2547, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2721, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2678, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2821, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2667, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2791, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 5e-05, | |
| "loss": 1.284, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2932, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3006, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "eval_gen_len": 18.713604661428445, | |
| "eval_loss": 1.4940598011016846, | |
| "eval_rouge1": 39.3973, | |
| "eval_rouge2": 17.7088, | |
| "eval_rougeL": 32.7938, | |
| "eval_rougeLsum": 32.7949, | |
| "eval_runtime": 2600.1572, | |
| "eval_samples_per_second": 4.356, | |
| "eval_steps_per_second": 1.089, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2774, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 5e-05, | |
| "loss": 1.296, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3068, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2759, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2824, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3045, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2766, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2674, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2793, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 5e-05, | |
| "loss": 1.284, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2861, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2924, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2949, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2945, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2832, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2674, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2703, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2977, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2994, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2934, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2885, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2849, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3009, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 5e-05, | |
| "loss": 1.284, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2807, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2911, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2881, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2812, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2934, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 5e-05, | |
| "loss": 1.284, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3022, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2916, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3006, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3157, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2868, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2879, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2873, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2625, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1801, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 5e-05, | |
| "loss": 1.178, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1779, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1858, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1579, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1974, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1938, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1938, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2015, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1884, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1764, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1796, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1815, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2015, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1862, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2127, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1809, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1978, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1866, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1646, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2152, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1896, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1779, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1951, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2209, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2072, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1887, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2142, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 5e-05, | |
| "loss": 1.157, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2097, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2041, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1877, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1838, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1936, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2084, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2174, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2014, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2259, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2031, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2046, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2045, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2016, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1902, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1769, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1939, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1984, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1975, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2162, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "learning_rate": 5e-05, | |
| "loss": 1.219, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1927, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2096, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2227, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2265, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2238, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2066, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2013, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1938, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2008, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2239, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2272, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2162, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2137, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "eval_gen_len": 18.72711220976428, | |
| "eval_loss": 1.511597752571106, | |
| "eval_rouge1": 39.4402, | |
| "eval_rouge2": 17.7009, | |
| "eval_rougeL": 32.8481, | |
| "eval_rougeLsum": 32.8525, | |
| "eval_runtime": 2553.8215, | |
| "eval_samples_per_second": 4.435, | |
| "eval_steps_per_second": 1.109, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2279, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2261, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2069, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2134, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 5e-05, | |
| "loss": 1.215, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 5e-05, | |
| "loss": 1.22, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2227, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2098, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2228, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2188, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2287, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2088, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2171, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2172, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2379, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2229, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2165, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2165, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2346, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 5e-05, | |
| "loss": 1.224, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2332, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2139, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2102, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 5e-05, | |
| "loss": 1.202, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2048, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2215, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2297, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2171, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2257, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2091, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2063, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1986, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 5e-05, | |
| "loss": 1.222, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2118, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2252, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2296, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2421, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 5e-05, | |
| "loss": 1.24, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2032, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2204, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2147, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2317, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2055, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2386, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 5e-05, | |
| "loss": 1.227, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2341, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2191, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2396, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2343, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2257, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2295, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2128, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2317, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2195, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2275, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2202, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2161, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2215, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2285, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 5e-05, | |
| "loss": 1.228, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2368, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2231, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 5e-05, | |
| "loss": 1.213, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 5e-05, | |
| "loss": 1.233, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2155, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1235, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1247, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1164, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1156, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1105, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "learning_rate": 5e-05, | |
| "loss": 1.107, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1293, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1294, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1274, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1346, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1089, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1235, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1154, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 6.11, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1244, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1424, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1417, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1393, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 6.14, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1147, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1396, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1564, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1457, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1349, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1724, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1388, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1393, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1325, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1315, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1428, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 5e-05, | |
| "loss": 1.148, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1534, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1454, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1383, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1266, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1382, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1169, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "eval_gen_len": 18.758806391807187, | |
| "eval_loss": 1.5334253311157227, | |
| "eval_rouge1": 39.579, | |
| "eval_rouge2": 17.841, | |
| "eval_rougeL": 32.8832, | |
| "eval_rougeLsum": 32.8889, | |
| "eval_runtime": 2444.6143, | |
| "eval_samples_per_second": 4.633, | |
| "eval_steps_per_second": 1.158, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1758, | |
| "step": 80100 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1429, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1347, | |
| "step": 80300 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 5e-05, | |
| "loss": 1.149, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1293, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1366, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1534, | |
| "step": 80700 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1561, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1476, | |
| "step": 80900 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1491, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 5e-05, | |
| "loss": 1.136, | |
| "step": 81100 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1254, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1536, | |
| "step": 81300 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 5e-05, | |
| "loss": 1.13, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1626, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1309, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1467, | |
| "step": 81700 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1761, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1415, | |
| "step": 81900 | |
| }, | |
| { | |
| "epoch": 6.43, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1515, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1396, | |
| "step": 82100 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1501, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1594, | |
| "step": 82300 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1486, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 6.47, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1459, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1682, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1647, | |
| "step": 82700 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1347, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1628, | |
| "step": 82900 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 5e-05, | |
| "loss": 1.129, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1791, | |
| "step": 83100 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1582, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 5e-05, | |
| "loss": 1.157, | |
| "step": 83300 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1459, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "learning_rate": 5e-05, | |
| "loss": 1.15, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1235, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 5e-05, | |
| "loss": 1.142, | |
| "step": 83700 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1587, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1564, | |
| "step": 83900 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1457, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1434, | |
| "step": 84100 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1512, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1666, | |
| "step": 84300 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1624, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1629, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1617, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1573, | |
| "step": 84700 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1666, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1675, | |
| "step": 84900 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1446, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1584, | |
| "step": 85100 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1524, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1391, | |
| "step": 85300 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1488, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1671, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1627, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1586, | |
| "step": 85700 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1468, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1746, | |
| "step": 85900 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1674, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "learning_rate": 5e-05, | |
| "loss": 1.148, | |
| "step": 86100 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1649, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1567, | |
| "step": 86300 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1611, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1541, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1589, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1942, | |
| "step": 86700 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1697, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1645, | |
| "step": 86900 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1638, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1851, | |
| "step": 87100 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1837, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1693, | |
| "step": 87300 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1633, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1601, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1706, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1805, | |
| "step": 87700 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1802, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1563, | |
| "step": 87900 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 5e-05, | |
| "loss": 1.182, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1532, | |
| "step": 88100 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1903, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "learning_rate": 5e-05, | |
| "loss": 1.163, | |
| "step": 88300 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1692, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1884, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1697, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 5e-05, | |
| "loss": 1.183, | |
| "step": 88700 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1736, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1652, | |
| "step": 88900 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1804, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1731, | |
| "step": 89100 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1745, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1464, | |
| "step": 89300 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0755, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0766, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 5e-05, | |
| "loss": 1.068, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0792, | |
| "step": 89700 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0746, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0775, | |
| "step": 89900 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0757, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "eval_gen_len": 18.725523086430652, | |
| "eval_loss": 1.5569835901260376, | |
| "eval_rouge1": 39.4694, | |
| "eval_rouge2": 17.8419, | |
| "eval_rougeL": 32.87, | |
| "eval_rougeLsum": 32.864, | |
| "eval_runtime": 2362.234, | |
| "eval_samples_per_second": 4.795, | |
| "eval_steps_per_second": 1.199, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0726, | |
| "step": 90100 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0736, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0776, | |
| "step": 90300 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0533, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0583, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0816, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0684, | |
| "step": 90700 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0656, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0685, | |
| "step": 90900 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1002, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0789, | |
| "step": 91100 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0738, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0764, | |
| "step": 91300 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0733, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0984, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0767, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0991, | |
| "step": 91700 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0882, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0868, | |
| "step": 91900 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0789, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0887, | |
| "step": 92100 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0777, | |
| "step": 92200 | |
| }, | |
| { | |
| "epoch": 7.24, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0868, | |
| "step": 92300 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0971, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0849, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0796, | |
| "step": 92600 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0923, | |
| "step": 92700 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0929, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0868, | |
| "step": 92900 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0987, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0891, | |
| "step": 93100 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0812, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 5e-05, | |
| "loss": 1.087, | |
| "step": 93300 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0809, | |
| "step": 93400 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0906, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "learning_rate": 5e-05, | |
| "loss": 1.104, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0869, | |
| "step": 93700 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "learning_rate": 5e-05, | |
| "loss": 1.074, | |
| "step": 93800 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "learning_rate": 5e-05, | |
| "loss": 1.091, | |
| "step": 93900 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0797, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 7.38, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0966, | |
| "step": 94100 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0721, | |
| "step": 94200 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0829, | |
| "step": 94300 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0853, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 7.41, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0875, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0935, | |
| "step": 94600 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1017, | |
| "step": 94700 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0981, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0926, | |
| "step": 94900 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0902, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1018, | |
| "step": 95100 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0786, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1005, | |
| "step": 95300 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "learning_rate": 5e-05, | |
| "loss": 1.094, | |
| "step": 95400 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1042, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0926, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 7.51, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1012, | |
| "step": 95700 | |
| }, | |
| { | |
| "epoch": 7.51, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1069, | |
| "step": 95800 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0873, | |
| "step": 95900 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0865, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 7.54, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1074, | |
| "step": 96100 | |
| }, | |
| { | |
| "epoch": 7.54, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0932, | |
| "step": 96200 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "learning_rate": 5e-05, | |
| "loss": 1.061, | |
| "step": 96300 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1158, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0995, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0997, | |
| "step": 96600 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1066, | |
| "step": 96700 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 5e-05, | |
| "loss": 1.089, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0987, | |
| "step": 96900 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0963, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 7.62, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0996, | |
| "step": 97100 | |
| }, | |
| { | |
| "epoch": 7.62, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0909, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1201, | |
| "step": 97300 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1071, | |
| "step": 97400 | |
| }, | |
| { | |
| "epoch": 7.65, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1108, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 7.65, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0933, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1054, | |
| "step": 97700 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1041, | |
| "step": 97800 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1092, | |
| "step": 97900 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1093, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1123, | |
| "step": 98100 | |
| }, | |
| { | |
| "epoch": 7.7, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1099, | |
| "step": 98200 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1093, | |
| "step": 98300 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1016, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1115, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0945, | |
| "step": 98600 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1069, | |
| "step": 98700 | |
| }, | |
| { | |
| "epoch": 7.75, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1253, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1174, | |
| "step": 98900 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1218, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1059, | |
| "step": 99100 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1036, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1115, | |
| "step": 99300 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1155, | |
| "step": 99400 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1088, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1035, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1217, | |
| "step": 99700 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 5e-05, | |
| "loss": 1.114, | |
| "step": 99800 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1107, | |
| "step": 99900 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0901, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "eval_gen_len": 18.753509314028427, | |
| "eval_loss": 1.5419243574142456, | |
| "eval_rouge1": 39.5689, | |
| "eval_rouge2": 17.9809, | |
| "eval_rougeL": 33.0274, | |
| "eval_rougeLsum": 33.0285, | |
| "eval_runtime": 2388.4938, | |
| "eval_samples_per_second": 4.742, | |
| "eval_steps_per_second": 1.186, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1357, | |
| "step": 100100 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1203, | |
| "step": 100200 | |
| }, | |
| { | |
| "epoch": 7.87, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1088, | |
| "step": 100300 | |
| }, | |
| { | |
| "epoch": 7.87, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1058, | |
| "step": 100400 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1042, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 7.89, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1145, | |
| "step": 100600 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1221, | |
| "step": 100700 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0963, | |
| "step": 100800 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1021, | |
| "step": 100900 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1095, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1184, | |
| "step": 101100 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1181, | |
| "step": 101200 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1212, | |
| "step": 101300 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1323, | |
| "step": 101400 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1228, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 7.97, | |
| "learning_rate": 5e-05, | |
| "loss": 1.123, | |
| "step": 101600 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1024, | |
| "step": 101700 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1225, | |
| "step": 101800 | |
| }, | |
| { | |
| "epoch": 7.99, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0943, | |
| "step": 101900 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 5e-05, | |
| "loss": 1.1048, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 8.01, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0375, | |
| "step": 102100 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0024, | |
| "step": 102200 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0088, | |
| "step": 102300 | |
| }, | |
| { | |
| "epoch": 8.03, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9942, | |
| "step": 102400 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0172, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 8.05, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0192, | |
| "step": 102600 | |
| }, | |
| { | |
| "epoch": 8.05, | |
| "learning_rate": 5e-05, | |
| "loss": 1.016, | |
| "step": 102700 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0227, | |
| "step": 102800 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0317, | |
| "step": 102900 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0192, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 8.09, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0271, | |
| "step": 103100 | |
| }, | |
| { | |
| "epoch": 8.09, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0337, | |
| "step": 103200 | |
| }, | |
| { | |
| "epoch": 8.1, | |
| "learning_rate": 5e-05, | |
| "loss": 1.017, | |
| "step": 103300 | |
| }, | |
| { | |
| "epoch": 8.11, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0143, | |
| "step": 103400 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0128, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0181, | |
| "step": 103600 | |
| }, | |
| { | |
| "epoch": 8.13, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0114, | |
| "step": 103700 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0279, | |
| "step": 103800 | |
| }, | |
| { | |
| "epoch": 8.15, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0109, | |
| "step": 103900 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0197, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0286, | |
| "step": 104100 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "learning_rate": 5e-05, | |
| "loss": 1.037, | |
| "step": 104200 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "learning_rate": 5e-05, | |
| "loss": 1.021, | |
| "step": 104300 | |
| }, | |
| { | |
| "epoch": 8.19, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0114, | |
| "step": 104400 | |
| }, | |
| { | |
| "epoch": 8.2, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0398, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 8.2, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0266, | |
| "step": 104600 | |
| }, | |
| { | |
| "epoch": 8.21, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0313, | |
| "step": 104700 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0181, | |
| "step": 104800 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0293, | |
| "step": 104900 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0336, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 8.24, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0231, | |
| "step": 105100 | |
| }, | |
| { | |
| "epoch": 8.25, | |
| "learning_rate": 5e-05, | |
| "loss": 1.039, | |
| "step": 105200 | |
| }, | |
| { | |
| "epoch": 8.26, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0291, | |
| "step": 105300 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0375, | |
| "step": 105400 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0385, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 8.28, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0182, | |
| "step": 105600 | |
| }, | |
| { | |
| "epoch": 8.29, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0498, | |
| "step": 105700 | |
| }, | |
| { | |
| "epoch": 8.3, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0368, | |
| "step": 105800 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0403, | |
| "step": 105900 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0292, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0306, | |
| "step": 106100 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "learning_rate": 5e-05, | |
| "loss": 1.038, | |
| "step": 106200 | |
| }, | |
| { | |
| "epoch": 8.34, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0445, | |
| "step": 106300 | |
| }, | |
| { | |
| "epoch": 8.34, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0541, | |
| "step": 106400 | |
| }, | |
| { | |
| "epoch": 8.35, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0281, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0506, | |
| "step": 106600 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0351, | |
| "step": 106700 | |
| }, | |
| { | |
| "epoch": 8.38, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0396, | |
| "step": 106800 | |
| }, | |
| { | |
| "epoch": 8.38, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0616, | |
| "step": 106900 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0317, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0462, | |
| "step": 107100 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0388, | |
| "step": 107200 | |
| }, | |
| { | |
| "epoch": 8.42, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0529, | |
| "step": 107300 | |
| }, | |
| { | |
| "epoch": 8.42, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0231, | |
| "step": 107400 | |
| }, | |
| { | |
| "epoch": 8.43, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0487, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0489, | |
| "step": 107600 | |
| }, | |
| { | |
| "epoch": 8.45, | |
| "learning_rate": 5e-05, | |
| "loss": 1.033, | |
| "step": 107700 | |
| }, | |
| { | |
| "epoch": 8.45, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0567, | |
| "step": 107800 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0471, | |
| "step": 107900 | |
| }, | |
| { | |
| "epoch": 8.47, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0462, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0405, | |
| "step": 108100 | |
| }, | |
| { | |
| "epoch": 8.49, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0485, | |
| "step": 108200 | |
| }, | |
| { | |
| "epoch": 8.49, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0537, | |
| "step": 108300 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0363, | |
| "step": 108400 | |
| }, | |
| { | |
| "epoch": 8.51, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0451, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 8.52, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0405, | |
| "step": 108600 | |
| }, | |
| { | |
| "epoch": 8.52, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0504, | |
| "step": 108700 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0418, | |
| "step": 108800 | |
| }, | |
| { | |
| "epoch": 8.54, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0605, | |
| "step": 108900 | |
| }, | |
| { | |
| "epoch": 8.55, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0347, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0526, | |
| "step": 109100 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0434, | |
| "step": 109200 | |
| }, | |
| { | |
| "epoch": 8.57, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0608, | |
| "step": 109300 | |
| }, | |
| { | |
| "epoch": 8.58, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0426, | |
| "step": 109400 | |
| }, | |
| { | |
| "epoch": 8.59, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0462, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0283, | |
| "step": 109600 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0405, | |
| "step": 109700 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0557, | |
| "step": 109800 | |
| }, | |
| { | |
| "epoch": 8.62, | |
| "learning_rate": 5e-05, | |
| "loss": 1.042, | |
| "step": 109900 | |
| }, | |
| { | |
| "epoch": 8.63, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0506, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 8.63, | |
| "eval_gen_len": 18.76313233865984, | |
| "eval_loss": 1.561972737312317, | |
| "eval_rouge1": 39.6409, | |
| "eval_rouge2": 18.0941, | |
| "eval_rougeL": 33.1012, | |
| "eval_rougeLsum": 33.0983, | |
| "eval_runtime": 2370.4984, | |
| "eval_samples_per_second": 4.778, | |
| "eval_steps_per_second": 1.195, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 8.63, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0483, | |
| "step": 110100 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0626, | |
| "step": 110200 | |
| }, | |
| { | |
| "epoch": 8.65, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0567, | |
| "step": 110300 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0455, | |
| "step": 110400 | |
| }, | |
| { | |
| "epoch": 8.67, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0634, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 8.67, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0537, | |
| "step": 110600 | |
| }, | |
| { | |
| "epoch": 8.68, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0522, | |
| "step": 110700 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0724, | |
| "step": 110800 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0534, | |
| "step": 110900 | |
| }, | |
| { | |
| "epoch": 8.71, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0407, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 8.71, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0354, | |
| "step": 111100 | |
| }, | |
| { | |
| "epoch": 8.72, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0423, | |
| "step": 111200 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0671, | |
| "step": 111300 | |
| }, | |
| { | |
| "epoch": 8.74, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0798, | |
| "step": 111400 | |
| }, | |
| { | |
| "epoch": 8.74, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0678, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "learning_rate": 5e-05, | |
| "loss": 1.046, | |
| "step": 111600 | |
| }, | |
| { | |
| "epoch": 8.76, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0487, | |
| "step": 111700 | |
| }, | |
| { | |
| "epoch": 8.77, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0628, | |
| "step": 111800 | |
| }, | |
| { | |
| "epoch": 8.78, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0606, | |
| "step": 111900 | |
| }, | |
| { | |
| "epoch": 8.78, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0695, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 8.79, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0646, | |
| "step": 112100 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0613, | |
| "step": 112200 | |
| }, | |
| { | |
| "epoch": 8.81, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0652, | |
| "step": 112300 | |
| }, | |
| { | |
| "epoch": 8.81, | |
| "learning_rate": 5e-05, | |
| "loss": 1.059, | |
| "step": 112400 | |
| }, | |
| { | |
| "epoch": 8.82, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0661, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 8.83, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0529, | |
| "step": 112600 | |
| }, | |
| { | |
| "epoch": 8.84, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0732, | |
| "step": 112700 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0477, | |
| "step": 112800 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0621, | |
| "step": 112900 | |
| }, | |
| { | |
| "epoch": 8.86, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0757, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 8.87, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0802, | |
| "step": 113100 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0634, | |
| "step": 113200 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0512, | |
| "step": 113300 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0457, | |
| "step": 113400 | |
| }, | |
| { | |
| "epoch": 8.9, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0774, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 8.91, | |
| "learning_rate": 5e-05, | |
| "loss": 1.084, | |
| "step": 113600 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0792, | |
| "step": 113700 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0755, | |
| "step": 113800 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0546, | |
| "step": 113900 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0596, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0675, | |
| "step": 114100 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0627, | |
| "step": 114200 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0663, | |
| "step": 114300 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0533, | |
| "step": 114400 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0769, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0566, | |
| "step": 114600 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0622, | |
| "step": 114700 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 5e-05, | |
| "loss": 1.027, | |
| "step": 114800 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9537, | |
| "step": 114900 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9737, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9674, | |
| "step": 115100 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9575, | |
| "step": 115200 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "learning_rate": 5e-05, | |
| "loss": 0.973, | |
| "step": 115300 | |
| }, | |
| { | |
| "epoch": 9.05, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9645, | |
| "step": 115400 | |
| }, | |
| { | |
| "epoch": 9.06, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9557, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 9.07, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9579, | |
| "step": 115600 | |
| }, | |
| { | |
| "epoch": 9.07, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9655, | |
| "step": 115700 | |
| }, | |
| { | |
| "epoch": 9.08, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9783, | |
| "step": 115800 | |
| }, | |
| { | |
| "epoch": 9.09, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9927, | |
| "step": 115900 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9748, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 9.11, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9799, | |
| "step": 116100 | |
| }, | |
| { | |
| "epoch": 9.11, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9669, | |
| "step": 116200 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0048, | |
| "step": 116300 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9806, | |
| "step": 116400 | |
| }, | |
| { | |
| "epoch": 9.14, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9617, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 9.14, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9767, | |
| "step": 116600 | |
| }, | |
| { | |
| "epoch": 9.15, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9616, | |
| "step": 116700 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9755, | |
| "step": 116800 | |
| }, | |
| { | |
| "epoch": 9.17, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9715, | |
| "step": 116900 | |
| }, | |
| { | |
| "epoch": 9.18, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9821, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 9.18, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9752, | |
| "step": 117100 | |
| }, | |
| { | |
| "epoch": 9.19, | |
| "learning_rate": 5e-05, | |
| "loss": 0.982, | |
| "step": 117200 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "learning_rate": 5e-05, | |
| "loss": 0.969, | |
| "step": 117300 | |
| }, | |
| { | |
| "epoch": 9.21, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9874, | |
| "step": 117400 | |
| }, | |
| { | |
| "epoch": 9.21, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9779, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9752, | |
| "step": 117600 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9965, | |
| "step": 117700 | |
| }, | |
| { | |
| "epoch": 9.24, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9936, | |
| "step": 117800 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9964, | |
| "step": 117900 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9793, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 9.26, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9735, | |
| "step": 118100 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9839, | |
| "step": 118200 | |
| }, | |
| { | |
| "epoch": 9.28, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9986, | |
| "step": 118300 | |
| }, | |
| { | |
| "epoch": 9.29, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9789, | |
| "step": 118400 | |
| }, | |
| { | |
| "epoch": 9.29, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9874, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 9.3, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9839, | |
| "step": 118600 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9716, | |
| "step": 118700 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9764, | |
| "step": 118800 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "learning_rate": 5e-05, | |
| "loss": 0.974, | |
| "step": 118900 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9817, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 9.34, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9874, | |
| "step": 119100 | |
| }, | |
| { | |
| "epoch": 9.35, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9955, | |
| "step": 119200 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9879, | |
| "step": 119300 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0022, | |
| "step": 119400 | |
| }, | |
| { | |
| "epoch": 9.37, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9908, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 9.38, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9849, | |
| "step": 119600 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9749, | |
| "step": 119700 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9932, | |
| "step": 119800 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9971, | |
| "step": 119900 | |
| }, | |
| { | |
| "epoch": 9.41, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9846, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 9.41, | |
| "eval_gen_len": 18.769135693475764, | |
| "eval_loss": 1.6027214527130127, | |
| "eval_rouge1": 39.3134, | |
| "eval_rouge2": 17.9425, | |
| "eval_rougeL": 32.8563, | |
| "eval_rougeLsum": 32.8559, | |
| "eval_runtime": 2347.8962, | |
| "eval_samples_per_second": 4.824, | |
| "eval_steps_per_second": 1.206, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 9.42, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9855, | |
| "step": 120100 | |
| }, | |
| { | |
| "epoch": 9.43, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9865, | |
| "step": 120200 | |
| }, | |
| { | |
| "epoch": 9.43, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0009, | |
| "step": 120300 | |
| }, | |
| { | |
| "epoch": 9.44, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9908, | |
| "step": 120400 | |
| }, | |
| { | |
| "epoch": 9.45, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0213, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 9.46, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0078, | |
| "step": 120600 | |
| }, | |
| { | |
| "epoch": 9.47, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0009, | |
| "step": 120700 | |
| }, | |
| { | |
| "epoch": 9.47, | |
| "learning_rate": 5e-05, | |
| "loss": 0.987, | |
| "step": 120800 | |
| }, | |
| { | |
| "epoch": 9.48, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9886, | |
| "step": 120900 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0088, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9998, | |
| "step": 121100 | |
| }, | |
| { | |
| "epoch": 9.51, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0107, | |
| "step": 121200 | |
| }, | |
| { | |
| "epoch": 9.51, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9858, | |
| "step": 121300 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9944, | |
| "step": 121400 | |
| }, | |
| { | |
| "epoch": 9.53, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9938, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 9.54, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9932, | |
| "step": 121600 | |
| }, | |
| { | |
| "epoch": 9.54, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0013, | |
| "step": 121700 | |
| }, | |
| { | |
| "epoch": 9.55, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0077, | |
| "step": 121800 | |
| }, | |
| { | |
| "epoch": 9.56, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0019, | |
| "step": 121900 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0033, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0015, | |
| "step": 122100 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0139, | |
| "step": 122200 | |
| }, | |
| { | |
| "epoch": 9.59, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0096, | |
| "step": 122300 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9902, | |
| "step": 122400 | |
| }, | |
| { | |
| "epoch": 9.61, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0116, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 9.61, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0116, | |
| "step": 122600 | |
| }, | |
| { | |
| "epoch": 9.62, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0046, | |
| "step": 122700 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "learning_rate": 5e-05, | |
| "loss": 1.014, | |
| "step": 122800 | |
| }, | |
| { | |
| "epoch": 9.64, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0187, | |
| "step": 122900 | |
| }, | |
| { | |
| "epoch": 9.65, | |
| "learning_rate": 5e-05, | |
| "loss": 1.006, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 9.65, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0128, | |
| "step": 123100 | |
| }, | |
| { | |
| "epoch": 9.66, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9984, | |
| "step": 123200 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "learning_rate": 5e-05, | |
| "loss": 0.995, | |
| "step": 123300 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0211, | |
| "step": 123400 | |
| }, | |
| { | |
| "epoch": 9.69, | |
| "learning_rate": 5e-05, | |
| "loss": 1.028, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 9.69, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0029, | |
| "step": 123600 | |
| }, | |
| { | |
| "epoch": 9.7, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0003, | |
| "step": 123700 | |
| }, | |
| { | |
| "epoch": 9.71, | |
| "learning_rate": 5e-05, | |
| "loss": 0.991, | |
| "step": 123800 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0048, | |
| "step": 123900 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0188, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 9.73, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0033, | |
| "step": 124100 | |
| }, | |
| { | |
| "epoch": 9.74, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0226, | |
| "step": 124200 | |
| }, | |
| { | |
| "epoch": 9.75, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0104, | |
| "step": 124300 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9867, | |
| "step": 124400 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0037, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 9.77, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0097, | |
| "step": 124600 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0213, | |
| "step": 124700 | |
| }, | |
| { | |
| "epoch": 9.79, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0441, | |
| "step": 124800 | |
| }, | |
| { | |
| "epoch": 9.8, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9998, | |
| "step": 124900 | |
| }, | |
| { | |
| "epoch": 9.8, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0209, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 9.81, | |
| "learning_rate": 5e-05, | |
| "loss": 1.012, | |
| "step": 125100 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0061, | |
| "step": 125200 | |
| }, | |
| { | |
| "epoch": 9.83, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0079, | |
| "step": 125300 | |
| }, | |
| { | |
| "epoch": 9.83, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0104, | |
| "step": 125400 | |
| }, | |
| { | |
| "epoch": 9.84, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0232, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 9.85, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9992, | |
| "step": 125600 | |
| }, | |
| { | |
| "epoch": 9.86, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9957, | |
| "step": 125700 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0075, | |
| "step": 125800 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0014, | |
| "step": 125900 | |
| }, | |
| { | |
| "epoch": 9.88, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0205, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 9.89, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0097, | |
| "step": 126100 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0078, | |
| "step": 126200 | |
| }, | |
| { | |
| "epoch": 9.91, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0099, | |
| "step": 126300 | |
| }, | |
| { | |
| "epoch": 9.91, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0139, | |
| "step": 126400 | |
| }, | |
| { | |
| "epoch": 9.92, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0171, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 9.93, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0505, | |
| "step": 126600 | |
| }, | |
| { | |
| "epoch": 9.94, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0137, | |
| "step": 126700 | |
| }, | |
| { | |
| "epoch": 9.94, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0241, | |
| "step": 126800 | |
| }, | |
| { | |
| "epoch": 9.95, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0221, | |
| "step": 126900 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0107, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "learning_rate": 5e-05, | |
| "loss": 1.022, | |
| "step": 127100 | |
| }, | |
| { | |
| "epoch": 9.98, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0076, | |
| "step": 127200 | |
| }, | |
| { | |
| "epoch": 9.98, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0194, | |
| "step": 127300 | |
| }, | |
| { | |
| "epoch": 9.99, | |
| "learning_rate": 5e-05, | |
| "loss": 1.011, | |
| "step": 127400 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0128, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 10.01, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9376, | |
| "step": 127600 | |
| }, | |
| { | |
| "epoch": 10.01, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9231, | |
| "step": 127700 | |
| }, | |
| { | |
| "epoch": 10.02, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9108, | |
| "step": 127800 | |
| }, | |
| { | |
| "epoch": 10.03, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9122, | |
| "step": 127900 | |
| }, | |
| { | |
| "epoch": 10.04, | |
| "learning_rate": 5e-05, | |
| "loss": 0.918, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 10.05, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9231, | |
| "step": 128100 | |
| }, | |
| { | |
| "epoch": 10.05, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9267, | |
| "step": 128200 | |
| }, | |
| { | |
| "epoch": 10.06, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9139, | |
| "step": 128300 | |
| }, | |
| { | |
| "epoch": 10.07, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9155, | |
| "step": 128400 | |
| }, | |
| { | |
| "epoch": 10.08, | |
| "learning_rate": 5e-05, | |
| "loss": 0.94, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 10.09, | |
| "learning_rate": 5e-05, | |
| "loss": 0.938, | |
| "step": 128600 | |
| }, | |
| { | |
| "epoch": 10.09, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9136, | |
| "step": 128700 | |
| }, | |
| { | |
| "epoch": 10.1, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9205, | |
| "step": 128800 | |
| }, | |
| { | |
| "epoch": 10.11, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9269, | |
| "step": 128900 | |
| }, | |
| { | |
| "epoch": 10.12, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9215, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 10.12, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9153, | |
| "step": 129100 | |
| }, | |
| { | |
| "epoch": 10.13, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9522, | |
| "step": 129200 | |
| }, | |
| { | |
| "epoch": 10.14, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9342, | |
| "step": 129300 | |
| }, | |
| { | |
| "epoch": 10.15, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9349, | |
| "step": 129400 | |
| }, | |
| { | |
| "epoch": 10.16, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9316, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 10.16, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9214, | |
| "step": 129600 | |
| }, | |
| { | |
| "epoch": 10.17, | |
| "learning_rate": 5e-05, | |
| "loss": 0.931, | |
| "step": 129700 | |
| }, | |
| { | |
| "epoch": 10.18, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9418, | |
| "step": 129800 | |
| }, | |
| { | |
| "epoch": 10.19, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9534, | |
| "step": 129900 | |
| }, | |
| { | |
| "epoch": 10.2, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9217, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 10.2, | |
| "eval_gen_len": 18.793502251258055, | |
| "eval_loss": 1.6331114768981934, | |
| "eval_rouge1": 39.2465, | |
| "eval_rouge2": 17.7996, | |
| "eval_rougeL": 32.7625, | |
| "eval_rougeLsum": 32.7679, | |
| "eval_runtime": 2377.4093, | |
| "eval_samples_per_second": 4.764, | |
| "eval_steps_per_second": 1.191, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 10.2, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9296, | |
| "step": 130100 | |
| }, | |
| { | |
| "epoch": 10.21, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9386, | |
| "step": 130200 | |
| }, | |
| { | |
| "epoch": 10.22, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9402, | |
| "step": 130300 | |
| }, | |
| { | |
| "epoch": 10.23, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9317, | |
| "step": 130400 | |
| }, | |
| { | |
| "epoch": 10.23, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9484, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 10.24, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9338, | |
| "step": 130600 | |
| }, | |
| { | |
| "epoch": 10.25, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9273, | |
| "step": 130700 | |
| }, | |
| { | |
| "epoch": 10.26, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9402, | |
| "step": 130800 | |
| }, | |
| { | |
| "epoch": 10.27, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9333, | |
| "step": 130900 | |
| }, | |
| { | |
| "epoch": 10.27, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9379, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 10.28, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9531, | |
| "step": 131100 | |
| }, | |
| { | |
| "epoch": 10.29, | |
| "learning_rate": 5e-05, | |
| "loss": 0.933, | |
| "step": 131200 | |
| }, | |
| { | |
| "epoch": 10.3, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9423, | |
| "step": 131300 | |
| }, | |
| { | |
| "epoch": 10.31, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9395, | |
| "step": 131400 | |
| }, | |
| { | |
| "epoch": 10.31, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9337, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 10.32, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9532, | |
| "step": 131600 | |
| }, | |
| { | |
| "epoch": 10.33, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9587, | |
| "step": 131700 | |
| }, | |
| { | |
| "epoch": 10.34, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9321, | |
| "step": 131800 | |
| }, | |
| { | |
| "epoch": 10.34, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9541, | |
| "step": 131900 | |
| }, | |
| { | |
| "epoch": 10.35, | |
| "learning_rate": 5e-05, | |
| "loss": 0.951, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 10.36, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9324, | |
| "step": 132100 | |
| }, | |
| { | |
| "epoch": 10.37, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9521, | |
| "step": 132200 | |
| }, | |
| { | |
| "epoch": 10.38, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9568, | |
| "step": 132300 | |
| }, | |
| { | |
| "epoch": 10.38, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9492, | |
| "step": 132400 | |
| }, | |
| { | |
| "epoch": 10.39, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9577, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 10.4, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9533, | |
| "step": 132600 | |
| }, | |
| { | |
| "epoch": 10.41, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9551, | |
| "step": 132700 | |
| }, | |
| { | |
| "epoch": 10.41, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9283, | |
| "step": 132800 | |
| }, | |
| { | |
| "epoch": 10.42, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9644, | |
| "step": 132900 | |
| }, | |
| { | |
| "epoch": 10.43, | |
| "learning_rate": 5e-05, | |
| "loss": 0.967, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 10.44, | |
| "learning_rate": 5e-05, | |
| "loss": 0.947, | |
| "step": 133100 | |
| }, | |
| { | |
| "epoch": 10.45, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9688, | |
| "step": 133200 | |
| }, | |
| { | |
| "epoch": 10.45, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9266, | |
| "step": 133300 | |
| }, | |
| { | |
| "epoch": 10.46, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9544, | |
| "step": 133400 | |
| }, | |
| { | |
| "epoch": 10.47, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9429, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 10.48, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9553, | |
| "step": 133600 | |
| }, | |
| { | |
| "epoch": 10.49, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9385, | |
| "step": 133700 | |
| }, | |
| { | |
| "epoch": 10.49, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9545, | |
| "step": 133800 | |
| }, | |
| { | |
| "epoch": 10.5, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9577, | |
| "step": 133900 | |
| }, | |
| { | |
| "epoch": 10.51, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9532, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 10.52, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9503, | |
| "step": 134100 | |
| }, | |
| { | |
| "epoch": 10.52, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9504, | |
| "step": 134200 | |
| }, | |
| { | |
| "epoch": 10.53, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9556, | |
| "step": 134300 | |
| }, | |
| { | |
| "epoch": 10.54, | |
| "learning_rate": 5e-05, | |
| "loss": 0.963, | |
| "step": 134400 | |
| }, | |
| { | |
| "epoch": 10.55, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9663, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 10.56, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9645, | |
| "step": 134600 | |
| }, | |
| { | |
| "epoch": 10.56, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9531, | |
| "step": 134700 | |
| }, | |
| { | |
| "epoch": 10.57, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9566, | |
| "step": 134800 | |
| }, | |
| { | |
| "epoch": 10.58, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9443, | |
| "step": 134900 | |
| }, | |
| { | |
| "epoch": 10.59, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9575, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 10.6, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9522, | |
| "step": 135100 | |
| }, | |
| { | |
| "epoch": 10.6, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9417, | |
| "step": 135200 | |
| }, | |
| { | |
| "epoch": 10.61, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9595, | |
| "step": 135300 | |
| }, | |
| { | |
| "epoch": 10.62, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9746, | |
| "step": 135400 | |
| }, | |
| { | |
| "epoch": 10.63, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9531, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 10.63, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9666, | |
| "step": 135600 | |
| }, | |
| { | |
| "epoch": 10.64, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9508, | |
| "step": 135700 | |
| }, | |
| { | |
| "epoch": 10.65, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9707, | |
| "step": 135800 | |
| }, | |
| { | |
| "epoch": 10.66, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9508, | |
| "step": 135900 | |
| }, | |
| { | |
| "epoch": 10.67, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9637, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 10.67, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9641, | |
| "step": 136100 | |
| }, | |
| { | |
| "epoch": 10.68, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9605, | |
| "step": 136200 | |
| }, | |
| { | |
| "epoch": 10.69, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9672, | |
| "step": 136300 | |
| }, | |
| { | |
| "epoch": 10.7, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9683, | |
| "step": 136400 | |
| }, | |
| { | |
| "epoch": 10.71, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9686, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 10.71, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9655, | |
| "step": 136600 | |
| }, | |
| { | |
| "epoch": 10.72, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9457, | |
| "step": 136700 | |
| }, | |
| { | |
| "epoch": 10.73, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9543, | |
| "step": 136800 | |
| }, | |
| { | |
| "epoch": 10.74, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9712, | |
| "step": 136900 | |
| }, | |
| { | |
| "epoch": 10.74, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9574, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 10.75, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9726, | |
| "step": 137100 | |
| }, | |
| { | |
| "epoch": 10.76, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9752, | |
| "step": 137200 | |
| }, | |
| { | |
| "epoch": 10.77, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9676, | |
| "step": 137300 | |
| }, | |
| { | |
| "epoch": 10.78, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9694, | |
| "step": 137400 | |
| }, | |
| { | |
| "epoch": 10.78, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9805, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 10.79, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9632, | |
| "step": 137600 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9553, | |
| "step": 137700 | |
| }, | |
| { | |
| "epoch": 10.81, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9712, | |
| "step": 137800 | |
| }, | |
| { | |
| "epoch": 10.81, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9857, | |
| "step": 137900 | |
| }, | |
| { | |
| "epoch": 10.82, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9676, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 10.83, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9511, | |
| "step": 138100 | |
| }, | |
| { | |
| "epoch": 10.84, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9619, | |
| "step": 138200 | |
| }, | |
| { | |
| "epoch": 10.85, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9716, | |
| "step": 138300 | |
| }, | |
| { | |
| "epoch": 10.85, | |
| "learning_rate": 5e-05, | |
| "loss": 0.966, | |
| "step": 138400 | |
| }, | |
| { | |
| "epoch": 10.86, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9637, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 10.87, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9726, | |
| "step": 138600 | |
| }, | |
| { | |
| "epoch": 10.88, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9753, | |
| "step": 138700 | |
| }, | |
| { | |
| "epoch": 10.89, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9845, | |
| "step": 138800 | |
| }, | |
| { | |
| "epoch": 10.89, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9738, | |
| "step": 138900 | |
| }, | |
| { | |
| "epoch": 10.9, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9612, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 10.91, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9791, | |
| "step": 139100 | |
| }, | |
| { | |
| "epoch": 10.92, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9772, | |
| "step": 139200 | |
| }, | |
| { | |
| "epoch": 10.92, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9645, | |
| "step": 139300 | |
| }, | |
| { | |
| "epoch": 10.93, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9761, | |
| "step": 139400 | |
| }, | |
| { | |
| "epoch": 10.94, | |
| "learning_rate": 5e-05, | |
| "loss": 0.982, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 10.95, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9827, | |
| "step": 139600 | |
| }, | |
| { | |
| "epoch": 10.96, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9782, | |
| "step": 139700 | |
| }, | |
| { | |
| "epoch": 10.96, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9825, | |
| "step": 139800 | |
| }, | |
| { | |
| "epoch": 10.97, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9786, | |
| "step": 139900 | |
| }, | |
| { | |
| "epoch": 10.98, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9657, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 10.98, | |
| "eval_gen_len": 18.765427739030635, | |
| "eval_loss": 1.6096522808074951, | |
| "eval_rouge1": 39.4889, | |
| "eval_rouge2": 17.9974, | |
| "eval_rougeL": 33.0222, | |
| "eval_rougeLsum": 33.0106, | |
| "eval_runtime": 2438.6189, | |
| "eval_samples_per_second": 4.645, | |
| "eval_steps_per_second": 1.161, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9702, | |
| "step": 140100 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9868, | |
| "step": 140200 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9282, | |
| "step": 140300 | |
| }, | |
| { | |
| "epoch": 11.01, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8992, | |
| "step": 140400 | |
| }, | |
| { | |
| "epoch": 11.02, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8785, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 11.03, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8643, | |
| "step": 140600 | |
| }, | |
| { | |
| "epoch": 11.03, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8896, | |
| "step": 140700 | |
| }, | |
| { | |
| "epoch": 11.04, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8829, | |
| "step": 140800 | |
| }, | |
| { | |
| "epoch": 11.05, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8793, | |
| "step": 140900 | |
| }, | |
| { | |
| "epoch": 11.06, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8632, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 11.07, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8656, | |
| "step": 141100 | |
| }, | |
| { | |
| "epoch": 11.07, | |
| "learning_rate": 5e-05, | |
| "loss": 0.887, | |
| "step": 141200 | |
| }, | |
| { | |
| "epoch": 11.08, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8705, | |
| "step": 141300 | |
| }, | |
| { | |
| "epoch": 11.09, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8785, | |
| "step": 141400 | |
| }, | |
| { | |
| "epoch": 11.1, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8813, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 11.11, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8884, | |
| "step": 141600 | |
| }, | |
| { | |
| "epoch": 11.11, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8907, | |
| "step": 141700 | |
| }, | |
| { | |
| "epoch": 11.12, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8836, | |
| "step": 141800 | |
| }, | |
| { | |
| "epoch": 11.13, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8849, | |
| "step": 141900 | |
| }, | |
| { | |
| "epoch": 11.14, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8802, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 11.14, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8824, | |
| "step": 142100 | |
| }, | |
| { | |
| "epoch": 11.15, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8882, | |
| "step": 142200 | |
| }, | |
| { | |
| "epoch": 11.16, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8845, | |
| "step": 142300 | |
| }, | |
| { | |
| "epoch": 11.17, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8946, | |
| "step": 142400 | |
| }, | |
| { | |
| "epoch": 11.18, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8921, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 11.18, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8864, | |
| "step": 142600 | |
| }, | |
| { | |
| "epoch": 11.19, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8896, | |
| "step": 142700 | |
| }, | |
| { | |
| "epoch": 11.2, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8809, | |
| "step": 142800 | |
| }, | |
| { | |
| "epoch": 11.21, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9066, | |
| "step": 142900 | |
| }, | |
| { | |
| "epoch": 11.21, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9023, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 11.22, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9192, | |
| "step": 143100 | |
| }, | |
| { | |
| "epoch": 11.23, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9137, | |
| "step": 143200 | |
| }, | |
| { | |
| "epoch": 11.24, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8832, | |
| "step": 143300 | |
| }, | |
| { | |
| "epoch": 11.25, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8887, | |
| "step": 143400 | |
| }, | |
| { | |
| "epoch": 11.25, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9082, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 11.26, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8935, | |
| "step": 143600 | |
| }, | |
| { | |
| "epoch": 11.27, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8923, | |
| "step": 143700 | |
| }, | |
| { | |
| "epoch": 11.28, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9177, | |
| "step": 143800 | |
| }, | |
| { | |
| "epoch": 11.29, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8995, | |
| "step": 143900 | |
| }, | |
| { | |
| "epoch": 11.29, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8996, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 11.3, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9046, | |
| "step": 144100 | |
| }, | |
| { | |
| "epoch": 11.31, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9144, | |
| "step": 144200 | |
| }, | |
| { | |
| "epoch": 11.32, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8997, | |
| "step": 144300 | |
| }, | |
| { | |
| "epoch": 11.32, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8818, | |
| "step": 144400 | |
| }, | |
| { | |
| "epoch": 11.33, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8952, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 11.34, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9112, | |
| "step": 144600 | |
| }, | |
| { | |
| "epoch": 11.35, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8976, | |
| "step": 144700 | |
| }, | |
| { | |
| "epoch": 11.36, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8983, | |
| "step": 144800 | |
| }, | |
| { | |
| "epoch": 11.36, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9207, | |
| "step": 144900 | |
| }, | |
| { | |
| "epoch": 11.37, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9011, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 11.38, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9068, | |
| "step": 145100 | |
| }, | |
| { | |
| "epoch": 11.39, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8939, | |
| "step": 145200 | |
| }, | |
| { | |
| "epoch": 11.4, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9102, | |
| "step": 145300 | |
| }, | |
| { | |
| "epoch": 11.4, | |
| "learning_rate": 5e-05, | |
| "loss": 0.914, | |
| "step": 145400 | |
| }, | |
| { | |
| "epoch": 11.41, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8868, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 11.42, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9171, | |
| "step": 145600 | |
| }, | |
| { | |
| "epoch": 11.43, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9036, | |
| "step": 145700 | |
| }, | |
| { | |
| "epoch": 11.43, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9035, | |
| "step": 145800 | |
| }, | |
| { | |
| "epoch": 11.44, | |
| "learning_rate": 5e-05, | |
| "loss": 0.916, | |
| "step": 145900 | |
| }, | |
| { | |
| "epoch": 11.45, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9135, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 11.46, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9106, | |
| "step": 146100 | |
| }, | |
| { | |
| "epoch": 11.47, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9052, | |
| "step": 146200 | |
| }, | |
| { | |
| "epoch": 11.47, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9017, | |
| "step": 146300 | |
| }, | |
| { | |
| "epoch": 11.48, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8923, | |
| "step": 146400 | |
| }, | |
| { | |
| "epoch": 11.49, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9145, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 11.5, | |
| "learning_rate": 5e-05, | |
| "loss": 0.917, | |
| "step": 146600 | |
| }, | |
| { | |
| "epoch": 11.5, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9145, | |
| "step": 146700 | |
| }, | |
| { | |
| "epoch": 11.51, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9204, | |
| "step": 146800 | |
| }, | |
| { | |
| "epoch": 11.52, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9049, | |
| "step": 146900 | |
| }, | |
| { | |
| "epoch": 11.53, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9188, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 11.54, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9211, | |
| "step": 147100 | |
| }, | |
| { | |
| "epoch": 11.54, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8907, | |
| "step": 147200 | |
| }, | |
| { | |
| "epoch": 11.55, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9142, | |
| "step": 147300 | |
| }, | |
| { | |
| "epoch": 11.56, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9182, | |
| "step": 147400 | |
| }, | |
| { | |
| "epoch": 11.57, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9251, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 11.58, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9156, | |
| "step": 147600 | |
| }, | |
| { | |
| "epoch": 11.58, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9317, | |
| "step": 147700 | |
| }, | |
| { | |
| "epoch": 11.59, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9117, | |
| "step": 147800 | |
| }, | |
| { | |
| "epoch": 11.6, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9183, | |
| "step": 147900 | |
| }, | |
| { | |
| "epoch": 11.61, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9069, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 11.61, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9165, | |
| "step": 148100 | |
| }, | |
| { | |
| "epoch": 11.62, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9078, | |
| "step": 148200 | |
| }, | |
| { | |
| "epoch": 11.63, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9342, | |
| "step": 148300 | |
| }, | |
| { | |
| "epoch": 11.64, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9206, | |
| "step": 148400 | |
| }, | |
| { | |
| "epoch": 11.65, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9127, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 11.65, | |
| "learning_rate": 5e-05, | |
| "loss": 0.921, | |
| "step": 148600 | |
| }, | |
| { | |
| "epoch": 11.66, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9207, | |
| "step": 148700 | |
| }, | |
| { | |
| "epoch": 11.67, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9156, | |
| "step": 148800 | |
| }, | |
| { | |
| "epoch": 11.68, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9028, | |
| "step": 148900 | |
| }, | |
| { | |
| "epoch": 11.69, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9092, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 11.69, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9215, | |
| "step": 149100 | |
| }, | |
| { | |
| "epoch": 11.7, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9208, | |
| "step": 149200 | |
| }, | |
| { | |
| "epoch": 11.71, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9308, | |
| "step": 149300 | |
| }, | |
| { | |
| "epoch": 11.72, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9075, | |
| "step": 149400 | |
| }, | |
| { | |
| "epoch": 11.72, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9103, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 11.73, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9306, | |
| "step": 149600 | |
| }, | |
| { | |
| "epoch": 11.74, | |
| "learning_rate": 5e-05, | |
| "loss": 0.924, | |
| "step": 149700 | |
| }, | |
| { | |
| "epoch": 11.75, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9214, | |
| "step": 149800 | |
| }, | |
| { | |
| "epoch": 11.76, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9252, | |
| "step": 149900 | |
| }, | |
| { | |
| "epoch": 11.76, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9206, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 11.76, | |
| "eval_gen_len": 18.793767105146994, | |
| "eval_loss": 1.6424586772918701, | |
| "eval_rouge1": 39.3295, | |
| "eval_rouge2": 17.9689, | |
| "eval_rougeL": 32.8956, | |
| "eval_rougeLsum": 32.889, | |
| "eval_runtime": 2509.3575, | |
| "eval_samples_per_second": 4.514, | |
| "eval_steps_per_second": 1.129, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 11.77, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9152, | |
| "step": 150100 | |
| }, | |
| { | |
| "epoch": 11.78, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9187, | |
| "step": 150200 | |
| }, | |
| { | |
| "epoch": 11.79, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9092, | |
| "step": 150300 | |
| }, | |
| { | |
| "epoch": 11.8, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9096, | |
| "step": 150400 | |
| }, | |
| { | |
| "epoch": 11.8, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9297, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 11.81, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9272, | |
| "step": 150600 | |
| }, | |
| { | |
| "epoch": 11.82, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9477, | |
| "step": 150700 | |
| }, | |
| { | |
| "epoch": 11.83, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9237, | |
| "step": 150800 | |
| }, | |
| { | |
| "epoch": 11.83, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9298, | |
| "step": 150900 | |
| }, | |
| { | |
| "epoch": 11.84, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9283, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 11.85, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9449, | |
| "step": 151100 | |
| }, | |
| { | |
| "epoch": 11.86, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9287, | |
| "step": 151200 | |
| }, | |
| { | |
| "epoch": 11.87, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9181, | |
| "step": 151300 | |
| }, | |
| { | |
| "epoch": 11.87, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9318, | |
| "step": 151400 | |
| }, | |
| { | |
| "epoch": 11.88, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9127, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 11.89, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9318, | |
| "step": 151600 | |
| }, | |
| { | |
| "epoch": 11.9, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9263, | |
| "step": 151700 | |
| }, | |
| { | |
| "epoch": 11.9, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9319, | |
| "step": 151800 | |
| }, | |
| { | |
| "epoch": 11.91, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9412, | |
| "step": 151900 | |
| }, | |
| { | |
| "epoch": 11.92, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9246, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 11.93, | |
| "learning_rate": 5e-05, | |
| "loss": 0.935, | |
| "step": 152100 | |
| }, | |
| { | |
| "epoch": 11.94, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9311, | |
| "step": 152200 | |
| }, | |
| { | |
| "epoch": 11.94, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9228, | |
| "step": 152300 | |
| }, | |
| { | |
| "epoch": 11.95, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9253, | |
| "step": 152400 | |
| }, | |
| { | |
| "epoch": 11.96, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9347, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 11.97, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9357, | |
| "step": 152600 | |
| }, | |
| { | |
| "epoch": 11.98, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9402, | |
| "step": 152700 | |
| }, | |
| { | |
| "epoch": 11.98, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9422, | |
| "step": 152800 | |
| }, | |
| { | |
| "epoch": 11.99, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9344, | |
| "step": 152900 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9344, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 12.01, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8553, | |
| "step": 153100 | |
| }, | |
| { | |
| "epoch": 12.01, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8309, | |
| "step": 153200 | |
| }, | |
| { | |
| "epoch": 12.02, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8392, | |
| "step": 153300 | |
| }, | |
| { | |
| "epoch": 12.03, | |
| "learning_rate": 5e-05, | |
| "loss": 0.839, | |
| "step": 153400 | |
| }, | |
| { | |
| "epoch": 12.04, | |
| "learning_rate": 5e-05, | |
| "loss": 0.832, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 12.05, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8413, | |
| "step": 153600 | |
| }, | |
| { | |
| "epoch": 12.05, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8306, | |
| "step": 153700 | |
| }, | |
| { | |
| "epoch": 12.06, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8318, | |
| "step": 153800 | |
| }, | |
| { | |
| "epoch": 12.07, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8422, | |
| "step": 153900 | |
| }, | |
| { | |
| "epoch": 12.08, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8423, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 12.09, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8485, | |
| "step": 154100 | |
| }, | |
| { | |
| "epoch": 12.09, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8339, | |
| "step": 154200 | |
| }, | |
| { | |
| "epoch": 12.1, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8444, | |
| "step": 154300 | |
| }, | |
| { | |
| "epoch": 12.11, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8435, | |
| "step": 154400 | |
| }, | |
| { | |
| "epoch": 12.12, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8419, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 12.12, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8566, | |
| "step": 154600 | |
| }, | |
| { | |
| "epoch": 12.13, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8516, | |
| "step": 154700 | |
| }, | |
| { | |
| "epoch": 12.14, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8737, | |
| "step": 154800 | |
| }, | |
| { | |
| "epoch": 12.15, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8527, | |
| "step": 154900 | |
| }, | |
| { | |
| "epoch": 12.16, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8388, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 12.16, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8528, | |
| "step": 155100 | |
| }, | |
| { | |
| "epoch": 12.17, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8548, | |
| "step": 155200 | |
| }, | |
| { | |
| "epoch": 12.18, | |
| "learning_rate": 5e-05, | |
| "loss": 0.866, | |
| "step": 155300 | |
| }, | |
| { | |
| "epoch": 12.19, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8604, | |
| "step": 155400 | |
| }, | |
| { | |
| "epoch": 12.2, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8596, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 12.2, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8483, | |
| "step": 155600 | |
| }, | |
| { | |
| "epoch": 12.21, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8523, | |
| "step": 155700 | |
| }, | |
| { | |
| "epoch": 12.22, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8588, | |
| "step": 155800 | |
| }, | |
| { | |
| "epoch": 12.23, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8571, | |
| "step": 155900 | |
| }, | |
| { | |
| "epoch": 12.23, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8554, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 12.24, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8586, | |
| "step": 156100 | |
| }, | |
| { | |
| "epoch": 12.25, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8577, | |
| "step": 156200 | |
| }, | |
| { | |
| "epoch": 12.26, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8751, | |
| "step": 156300 | |
| }, | |
| { | |
| "epoch": 12.27, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8783, | |
| "step": 156400 | |
| }, | |
| { | |
| "epoch": 12.27, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8635, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 12.28, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8546, | |
| "step": 156600 | |
| }, | |
| { | |
| "epoch": 12.29, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8548, | |
| "step": 156700 | |
| }, | |
| { | |
| "epoch": 12.3, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8657, | |
| "step": 156800 | |
| }, | |
| { | |
| "epoch": 12.3, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8578, | |
| "step": 156900 | |
| }, | |
| { | |
| "epoch": 12.31, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8626, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 12.32, | |
| "learning_rate": 5e-05, | |
| "loss": 0.869, | |
| "step": 157100 | |
| }, | |
| { | |
| "epoch": 12.33, | |
| "learning_rate": 5e-05, | |
| "loss": 0.864, | |
| "step": 157200 | |
| }, | |
| { | |
| "epoch": 12.34, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8677, | |
| "step": 157300 | |
| }, | |
| { | |
| "epoch": 12.34, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8673, | |
| "step": 157400 | |
| }, | |
| { | |
| "epoch": 12.35, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8685, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 12.36, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8777, | |
| "step": 157600 | |
| }, | |
| { | |
| "epoch": 12.37, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8707, | |
| "step": 157700 | |
| }, | |
| { | |
| "epoch": 12.38, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8495, | |
| "step": 157800 | |
| }, | |
| { | |
| "epoch": 12.38, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8891, | |
| "step": 157900 | |
| }, | |
| { | |
| "epoch": 12.39, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8652, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 12.4, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8688, | |
| "step": 158100 | |
| }, | |
| { | |
| "epoch": 12.41, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8519, | |
| "step": 158200 | |
| }, | |
| { | |
| "epoch": 12.41, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8686, | |
| "step": 158300 | |
| }, | |
| { | |
| "epoch": 12.42, | |
| "learning_rate": 5e-05, | |
| "loss": 0.859, | |
| "step": 158400 | |
| }, | |
| { | |
| "epoch": 12.43, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8559, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 12.44, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8713, | |
| "step": 158600 | |
| }, | |
| { | |
| "epoch": 12.45, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8868, | |
| "step": 158700 | |
| }, | |
| { | |
| "epoch": 12.45, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8699, | |
| "step": 158800 | |
| }, | |
| { | |
| "epoch": 12.46, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8768, | |
| "step": 158900 | |
| }, | |
| { | |
| "epoch": 12.47, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8746, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 12.48, | |
| "learning_rate": 5e-05, | |
| "loss": 0.873, | |
| "step": 159100 | |
| }, | |
| { | |
| "epoch": 12.49, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8646, | |
| "step": 159200 | |
| }, | |
| { | |
| "epoch": 12.49, | |
| "learning_rate": 5e-05, | |
| "loss": 0.86, | |
| "step": 159300 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8631, | |
| "step": 159400 | |
| }, | |
| { | |
| "epoch": 12.51, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8907, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 12.52, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8596, | |
| "step": 159600 | |
| }, | |
| { | |
| "epoch": 12.52, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8781, | |
| "step": 159700 | |
| }, | |
| { | |
| "epoch": 12.53, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8728, | |
| "step": 159800 | |
| }, | |
| { | |
| "epoch": 12.54, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8767, | |
| "step": 159900 | |
| }, | |
| { | |
| "epoch": 12.55, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8635, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 12.55, | |
| "eval_gen_len": 18.793943674406286, | |
| "eval_loss": 1.6820098161697388, | |
| "eval_rouge1": 39.4633, | |
| "eval_rouge2": 18.0301, | |
| "eval_rougeL": 32.9134, | |
| "eval_rougeLsum": 32.919, | |
| "eval_runtime": 2390.8284, | |
| "eval_samples_per_second": 4.738, | |
| "eval_steps_per_second": 1.185, | |
| "step": 160000 | |
| } | |
| ], | |
| "max_steps": 637550, | |
| "num_train_epochs": 50, | |
| "total_flos": 8.741750241043968e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |