| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 30.0, | |
| "eval_steps": 500, | |
| "global_step": 15630, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.880038387715931e-05, | |
| "loss": 1.836, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_f1": 0.8971, | |
| "eval_gen_len": 19.974545454545453, | |
| "eval_loss": 1.5560153722763062, | |
| "eval_precision": 0.9105, | |
| "eval_recall": 0.8843, | |
| "eval_rouge1": 0.4155, | |
| "eval_rouge2": 0.2028, | |
| "eval_rougeL": 0.3561, | |
| "eval_rougeLsum": 0.3559, | |
| "eval_runtime": 315.2437, | |
| "eval_samples_per_second": 8.723, | |
| "eval_steps_per_second": 0.546, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.760076775431862e-05, | |
| "loss": 1.5951, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_f1": 0.8997, | |
| "eval_gen_len": 19.93527272727273, | |
| "eval_loss": 1.5003960132598877, | |
| "eval_precision": 0.9115, | |
| "eval_recall": 0.8886, | |
| "eval_rouge1": 0.4333, | |
| "eval_rouge2": 0.2136, | |
| "eval_rougeL": 0.3695, | |
| "eval_rougeLsum": 0.3694, | |
| "eval_runtime": 311.8452, | |
| "eval_samples_per_second": 8.818, | |
| "eval_steps_per_second": 0.552, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 1.6401151631477927e-05, | |
| "loss": 1.469, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_f1": 0.9001, | |
| "eval_gen_len": 19.938545454545455, | |
| "eval_loss": 1.4690784215927124, | |
| "eval_precision": 0.912, | |
| "eval_recall": 0.8888, | |
| "eval_rouge1": 0.4355, | |
| "eval_rouge2": 0.2176, | |
| "eval_rougeL": 0.3729, | |
| "eval_rougeLsum": 0.3728, | |
| "eval_runtime": 312.4642, | |
| "eval_samples_per_second": 8.801, | |
| "eval_steps_per_second": 0.55, | |
| "step": 1563 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 1.5201535508637238e-05, | |
| "loss": 1.373, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_f1": 0.9003, | |
| "eval_gen_len": 19.964727272727274, | |
| "eval_loss": 1.4657667875289917, | |
| "eval_precision": 0.9137, | |
| "eval_recall": 0.8877, | |
| "eval_rouge1": 0.4311, | |
| "eval_rouge2": 0.2164, | |
| "eval_rougeL": 0.3706, | |
| "eval_rougeLsum": 0.3704, | |
| "eval_runtime": 313.2326, | |
| "eval_samples_per_second": 8.779, | |
| "eval_steps_per_second": 0.549, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 1.4001919385796546e-05, | |
| "loss": 1.2902, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_f1": 0.9008, | |
| "eval_gen_len": 19.94981818181818, | |
| "eval_loss": 1.4541645050048828, | |
| "eval_precision": 0.9136, | |
| "eval_recall": 0.8887, | |
| "eval_rouge1": 0.4368, | |
| "eval_rouge2": 0.2218, | |
| "eval_rougeL": 0.3762, | |
| "eval_rougeLsum": 0.376, | |
| "eval_runtime": 313.1455, | |
| "eval_samples_per_second": 8.782, | |
| "eval_steps_per_second": 0.549, | |
| "step": 2605 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 1.2802303262955855e-05, | |
| "loss": 1.222, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_f1": 0.9018, | |
| "eval_gen_len": 19.942545454545453, | |
| "eval_loss": 1.458353042602539, | |
| "eval_precision": 0.914, | |
| "eval_recall": 0.8902, | |
| "eval_rouge1": 0.4407, | |
| "eval_rouge2": 0.223, | |
| "eval_rougeL": 0.3802, | |
| "eval_rougeLsum": 0.3798, | |
| "eval_runtime": 312.4439, | |
| "eval_samples_per_second": 8.802, | |
| "eval_steps_per_second": 0.55, | |
| "step": 3126 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 1.1602687140115163e-05, | |
| "loss": 1.1655, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_f1": 0.9019, | |
| "eval_gen_len": 19.932727272727274, | |
| "eval_loss": 1.4708688259124756, | |
| "eval_precision": 0.9145, | |
| "eval_recall": 0.89, | |
| "eval_rouge1": 0.4404, | |
| "eval_rouge2": 0.2246, | |
| "eval_rougeL": 0.3806, | |
| "eval_rougeLsum": 0.3803, | |
| "eval_runtime": 313.9664, | |
| "eval_samples_per_second": 8.759, | |
| "eval_steps_per_second": 0.548, | |
| "step": 3647 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "learning_rate": 1.0403071017274472e-05, | |
| "loss": 1.11, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_f1": 0.9026, | |
| "eval_gen_len": 19.908363636363635, | |
| "eval_loss": 1.47238028049469, | |
| "eval_precision": 0.9153, | |
| "eval_recall": 0.8906, | |
| "eval_rouge1": 0.4435, | |
| "eval_rouge2": 0.2269, | |
| "eval_rougeL": 0.383, | |
| "eval_rougeLsum": 0.3828, | |
| "eval_runtime": 312.3634, | |
| "eval_samples_per_second": 8.804, | |
| "eval_steps_per_second": 0.551, | |
| "step": 4168 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "learning_rate": 9.203454894433782e-06, | |
| "loss": 1.0629, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_f1": 0.9028, | |
| "eval_gen_len": 19.928, | |
| "eval_loss": 1.485286831855774, | |
| "eval_precision": 0.9155, | |
| "eval_recall": 0.8908, | |
| "eval_rouge1": 0.4431, | |
| "eval_rouge2": 0.2273, | |
| "eval_rougeL": 0.3832, | |
| "eval_rougeLsum": 0.383, | |
| "eval_runtime": 312.2978, | |
| "eval_samples_per_second": 8.806, | |
| "eval_steps_per_second": 0.551, | |
| "step": 4689 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "learning_rate": 8.003838771593091e-06, | |
| "loss": 1.023, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_f1": 0.9021, | |
| "eval_gen_len": 19.944, | |
| "eval_loss": 1.503290057182312, | |
| "eval_precision": 0.9152, | |
| "eval_recall": 0.8897, | |
| "eval_rouge1": 0.4409, | |
| "eval_rouge2": 0.2247, | |
| "eval_rougeL": 0.3819, | |
| "eval_rougeLsum": 0.3818, | |
| "eval_runtime": 312.2524, | |
| "eval_samples_per_second": 8.807, | |
| "eval_steps_per_second": 0.551, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 10.56, | |
| "learning_rate": 6.8042226487524e-06, | |
| "loss": 0.9862, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_f1": 0.9034, | |
| "eval_gen_len": 19.912363636363636, | |
| "eval_loss": 1.5074084997177124, | |
| "eval_precision": 0.9158, | |
| "eval_recall": 0.8916, | |
| "eval_rouge1": 0.4479, | |
| "eval_rouge2": 0.2278, | |
| "eval_rougeL": 0.3862, | |
| "eval_rougeLsum": 0.386, | |
| "eval_runtime": 313.5934, | |
| "eval_samples_per_second": 8.769, | |
| "eval_steps_per_second": 0.548, | |
| "step": 5731 | |
| }, | |
| { | |
| "epoch": 11.52, | |
| "learning_rate": 5.6046065259117085e-06, | |
| "loss": 0.957, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_f1": 0.903, | |
| "eval_gen_len": 19.90327272727273, | |
| "eval_loss": 1.518417239189148, | |
| "eval_precision": 0.9159, | |
| "eval_recall": 0.8909, | |
| "eval_rouge1": 0.4461, | |
| "eval_rouge2": 0.2264, | |
| "eval_rougeL": 0.3846, | |
| "eval_rougeLsum": 0.3847, | |
| "eval_runtime": 314.9612, | |
| "eval_samples_per_second": 8.731, | |
| "eval_steps_per_second": 0.546, | |
| "step": 6252 | |
| }, | |
| { | |
| "epoch": 12.48, | |
| "learning_rate": 4.404990403071018e-06, | |
| "loss": 0.9315, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_f1": 0.9031, | |
| "eval_gen_len": 19.908363636363635, | |
| "eval_loss": 1.5269190073013306, | |
| "eval_precision": 0.9156, | |
| "eval_recall": 0.8912, | |
| "eval_rouge1": 0.4473, | |
| "eval_rouge2": 0.2284, | |
| "eval_rougeL": 0.386, | |
| "eval_rougeLsum": 0.3858, | |
| "eval_runtime": 311.2352, | |
| "eval_samples_per_second": 8.836, | |
| "eval_steps_per_second": 0.553, | |
| "step": 6773 | |
| }, | |
| { | |
| "epoch": 13.44, | |
| "learning_rate": 3.2053742802303266e-06, | |
| "loss": 0.9093, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_f1": 0.9029, | |
| "eval_gen_len": 19.913454545454545, | |
| "eval_loss": 1.5310986042022705, | |
| "eval_precision": 0.9155, | |
| "eval_recall": 0.8909, | |
| "eval_rouge1": 0.4453, | |
| "eval_rouge2": 0.2273, | |
| "eval_rougeL": 0.3846, | |
| "eval_rougeLsum": 0.3843, | |
| "eval_runtime": 313.2169, | |
| "eval_samples_per_second": 8.78, | |
| "eval_steps_per_second": 0.549, | |
| "step": 7294 | |
| }, | |
| { | |
| "epoch": 14.4, | |
| "learning_rate": 2.0057581573896352e-06, | |
| "loss": 0.8927, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_f1": 0.9029, | |
| "eval_gen_len": 19.906545454545455, | |
| "eval_loss": 1.5351076126098633, | |
| "eval_precision": 0.9156, | |
| "eval_recall": 0.8909, | |
| "eval_rouge1": 0.4457, | |
| "eval_rouge2": 0.2267, | |
| "eval_rougeL": 0.3842, | |
| "eval_rougeLsum": 0.384, | |
| "eval_runtime": 314.8443, | |
| "eval_samples_per_second": 8.734, | |
| "eval_steps_per_second": 0.546, | |
| "step": 7815 | |
| }, | |
| { | |
| "epoch": 15.36, | |
| "learning_rate": 8.061420345489445e-07, | |
| "loss": 0.8773, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_f1": 0.9025, | |
| "eval_gen_len": 19.942545454545453, | |
| "eval_loss": 1.5439822673797607, | |
| "eval_precision": 0.9151, | |
| "eval_recall": 0.8905, | |
| "eval_rouge1": 0.4427, | |
| "eval_rouge2": 0.225, | |
| "eval_rougeL": 0.382, | |
| "eval_rougeLsum": 0.382, | |
| "eval_runtime": 314.8749, | |
| "eval_samples_per_second": 8.734, | |
| "eval_steps_per_second": 0.546, | |
| "step": 8336 | |
| }, | |
| { | |
| "epoch": 16.31, | |
| "learning_rate": 6.404350607805503e-06, | |
| "loss": 0.8806, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_f1": 0.9036, | |
| "eval_gen_len": 19.88509090909091, | |
| "eval_loss": 1.5509530305862427, | |
| "eval_precision": 0.9159, | |
| "eval_recall": 0.8919, | |
| "eval_rouge1": 0.4495, | |
| "eval_rouge2": 0.2279, | |
| "eval_rougeL": 0.3868, | |
| "eval_rougeLsum": 0.3869, | |
| "eval_runtime": 312.7951, | |
| "eval_samples_per_second": 8.792, | |
| "eval_steps_per_second": 0.55, | |
| "step": 8857 | |
| }, | |
| { | |
| "epoch": 17.27, | |
| "learning_rate": 5.6046065259117085e-06, | |
| "loss": 0.8683, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_f1": 0.9038, | |
| "eval_gen_len": 19.88290909090909, | |
| "eval_loss": 1.56792151927948, | |
| "eval_precision": 0.9161, | |
| "eval_recall": 0.8921, | |
| "eval_rouge1": 0.4473, | |
| "eval_rouge2": 0.2282, | |
| "eval_rougeL": 0.3856, | |
| "eval_rougeLsum": 0.3857, | |
| "eval_runtime": 314.8371, | |
| "eval_samples_per_second": 8.735, | |
| "eval_steps_per_second": 0.546, | |
| "step": 9378 | |
| }, | |
| { | |
| "epoch": 18.23, | |
| "learning_rate": 4.804862444017915e-06, | |
| "loss": 0.8413, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_f1": 0.9035, | |
| "eval_gen_len": 19.913454545454545, | |
| "eval_loss": 1.574545979499817, | |
| "eval_precision": 0.9159, | |
| "eval_recall": 0.8918, | |
| "eval_rouge1": 0.4492, | |
| "eval_rouge2": 0.2282, | |
| "eval_rougeL": 0.3861, | |
| "eval_rougeLsum": 0.3864, | |
| "eval_runtime": 311.5846, | |
| "eval_samples_per_second": 8.826, | |
| "eval_steps_per_second": 0.552, | |
| "step": 9899 | |
| }, | |
| { | |
| "epoch": 19.19, | |
| "learning_rate": 4.005118362124121e-06, | |
| "loss": 0.8257, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_f1": 0.9031, | |
| "eval_gen_len": 19.899636363636365, | |
| "eval_loss": 1.583512544631958, | |
| "eval_precision": 0.9153, | |
| "eval_recall": 0.8915, | |
| "eval_rouge1": 0.4471, | |
| "eval_rouge2": 0.2266, | |
| "eval_rougeL": 0.3852, | |
| "eval_rougeLsum": 0.3853, | |
| "eval_runtime": 311.7771, | |
| "eval_samples_per_second": 8.82, | |
| "eval_steps_per_second": 0.552, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 20.15, | |
| "learning_rate": 3.2053742802303266e-06, | |
| "loss": 0.8097, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_f1": 0.9034, | |
| "eval_gen_len": 19.907272727272726, | |
| "eval_loss": 1.59569251537323, | |
| "eval_precision": 0.9156, | |
| "eval_recall": 0.8919, | |
| "eval_rouge1": 0.4472, | |
| "eval_rouge2": 0.2271, | |
| "eval_rougeL": 0.3856, | |
| "eval_rougeLsum": 0.3856, | |
| "eval_runtime": 309.5923, | |
| "eval_samples_per_second": 8.883, | |
| "eval_steps_per_second": 0.556, | |
| "step": 10941 | |
| }, | |
| { | |
| "epoch": 21.11, | |
| "learning_rate": 2.4056301983365325e-06, | |
| "loss": 0.7926, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_f1": 0.9034, | |
| "eval_gen_len": 19.892, | |
| "eval_loss": 1.595582127571106, | |
| "eval_precision": 0.9159, | |
| "eval_recall": 0.8916, | |
| "eval_rouge1": 0.4479, | |
| "eval_rouge2": 0.2282, | |
| "eval_rougeL": 0.3855, | |
| "eval_rougeLsum": 0.3857, | |
| "eval_runtime": 311.5772, | |
| "eval_samples_per_second": 8.826, | |
| "eval_steps_per_second": 0.552, | |
| "step": 11462 | |
| }, | |
| { | |
| "epoch": 22.07, | |
| "learning_rate": 1.6058861164427384e-06, | |
| "loss": 0.7841, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_f1": 0.9028, | |
| "eval_gen_len": 19.912, | |
| "eval_loss": 1.5990447998046875, | |
| "eval_precision": 0.9155, | |
| "eval_recall": 0.8908, | |
| "eval_rouge1": 0.4444, | |
| "eval_rouge2": 0.2261, | |
| "eval_rougeL": 0.3833, | |
| "eval_rougeLsum": 0.3834, | |
| "eval_runtime": 311.6057, | |
| "eval_samples_per_second": 8.825, | |
| "eval_steps_per_second": 0.552, | |
| "step": 11983 | |
| }, | |
| { | |
| "epoch": 23.03, | |
| "learning_rate": 8.061420345489445e-07, | |
| "loss": 0.7734, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 23.99, | |
| "learning_rate": 6.397952655150352e-09, | |
| "loss": 0.7669, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_f1": 0.9037, | |
| "eval_gen_len": 19.900727272727273, | |
| "eval_loss": 1.6096539497375488, | |
| "eval_precision": 0.9162, | |
| "eval_recall": 0.892, | |
| "eval_rouge1": 0.4491, | |
| "eval_rouge2": 0.2284, | |
| "eval_rougeL": 0.3872, | |
| "eval_rougeLsum": 0.387, | |
| "eval_runtime": 312.9411, | |
| "eval_samples_per_second": 8.788, | |
| "eval_steps_per_second": 0.55, | |
| "step": 12504 | |
| }, | |
| { | |
| "epoch": 24.95, | |
| "learning_rate": 3.3653230966090854e-06, | |
| "loss": 0.7733, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_f1": 0.9027, | |
| "eval_gen_len": 19.91781818181818, | |
| "eval_loss": 1.6059536933898926, | |
| "eval_precision": 0.9154, | |
| "eval_recall": 0.8906, | |
| "eval_rouge1": 0.4442, | |
| "eval_rouge2": 0.2257, | |
| "eval_rougeL": 0.3827, | |
| "eval_rougeLsum": 0.3828, | |
| "eval_runtime": 312.8395, | |
| "eval_samples_per_second": 8.79, | |
| "eval_steps_per_second": 0.55, | |
| "step": 13025 | |
| }, | |
| { | |
| "epoch": 25.91, | |
| "learning_rate": 2.72552783109405e-06, | |
| "loss": 0.7631, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_f1": 0.9031, | |
| "eval_gen_len": 19.917454545454547, | |
| "eval_loss": 1.618681788444519, | |
| "eval_precision": 0.9154, | |
| "eval_recall": 0.8915, | |
| "eval_rouge1": 0.4472, | |
| "eval_rouge2": 0.2276, | |
| "eval_rougeL": 0.3861, | |
| "eval_rougeLsum": 0.3861, | |
| "eval_runtime": 312.5385, | |
| "eval_samples_per_second": 8.799, | |
| "eval_steps_per_second": 0.55, | |
| "step": 13546 | |
| }, | |
| { | |
| "epoch": 26.87, | |
| "learning_rate": 2.085732565579015e-06, | |
| "loss": 0.7505, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_f1": 0.9031, | |
| "eval_gen_len": 19.896727272727272, | |
| "eval_loss": 1.620802402496338, | |
| "eval_precision": 0.9155, | |
| "eval_recall": 0.8914, | |
| "eval_rouge1": 0.4463, | |
| "eval_rouge2": 0.227, | |
| "eval_rougeL": 0.3852, | |
| "eval_rougeLsum": 0.3851, | |
| "eval_runtime": 310.9302, | |
| "eval_samples_per_second": 8.844, | |
| "eval_steps_per_second": 0.553, | |
| "step": 14067 | |
| }, | |
| { | |
| "epoch": 27.83, | |
| "learning_rate": 1.4459373000639796e-06, | |
| "loss": 0.7413, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_f1": 0.9032, | |
| "eval_gen_len": 19.91527272727273, | |
| "eval_loss": 1.623663306236267, | |
| "eval_precision": 0.9159, | |
| "eval_recall": 0.8912, | |
| "eval_rouge1": 0.4468, | |
| "eval_rouge2": 0.2273, | |
| "eval_rougeL": 0.3854, | |
| "eval_rougeLsum": 0.3853, | |
| "eval_runtime": 311.7716, | |
| "eval_samples_per_second": 8.821, | |
| "eval_steps_per_second": 0.552, | |
| "step": 14588 | |
| }, | |
| { | |
| "epoch": 28.79, | |
| "learning_rate": 8.061420345489445e-07, | |
| "loss": 0.7348, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_f1": 0.9035, | |
| "eval_gen_len": 19.893818181818183, | |
| "eval_loss": 1.6312142610549927, | |
| "eval_precision": 0.9158, | |
| "eval_recall": 0.8918, | |
| "eval_rouge1": 0.4482, | |
| "eval_rouge2": 0.2268, | |
| "eval_rougeL": 0.3858, | |
| "eval_rougeLsum": 0.3858, | |
| "eval_runtime": 310.3349, | |
| "eval_samples_per_second": 8.861, | |
| "eval_steps_per_second": 0.554, | |
| "step": 15109 | |
| }, | |
| { | |
| "epoch": 29.75, | |
| "learning_rate": 1.6634676903390917e-07, | |
| "loss": 0.7286, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_f1": 0.9033, | |
| "eval_gen_len": 19.908727272727273, | |
| "eval_loss": 1.6350260972976685, | |
| "eval_precision": 0.9156, | |
| "eval_recall": 0.8915, | |
| "eval_rouge1": 0.4471, | |
| "eval_rouge2": 0.2259, | |
| "eval_rougeL": 0.3846, | |
| "eval_rougeLsum": 0.3845, | |
| "eval_runtime": 314.8086, | |
| "eval_samples_per_second": 8.735, | |
| "eval_steps_per_second": 0.546, | |
| "step": 15630 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "step": 15630, | |
| "total_flos": 3.2113882736270377e+18, | |
| "train_loss": 0.1497309269236969, | |
| "train_runtime": 15515.0349, | |
| "train_samples_per_second": 96.68, | |
| "train_steps_per_second": 1.007 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 15630, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "total_flos": 3.2113882736270377e+18, | |
| "train_batch_size": 24, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |