| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.9332700393327005, | |
| "global_step": 29000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 8.440677966101696e-05, | |
| "loss": 2.551, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00016915254237288136, | |
| "loss": 2.541, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002538983050847458, | |
| "loss": 2.5194, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00033864406779661016, | |
| "loss": 2.4769, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0004233898305084746, | |
| "loss": 2.46, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0004990957727375481, | |
| "loss": 2.403, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0004897144148896089, | |
| "loss": 2.3829, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0004802953809057343, | |
| "loss": 2.3175, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.0004708763469218597, | |
| "loss": 2.3087, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0004614573129379851, | |
| "loss": 2.2763, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.0004520382789541105, | |
| "loss": 2.2438, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0004426192449702359, | |
| "loss": 2.2358, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.000433219049054329, | |
| "loss": 2.194, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.0004238000150704544, | |
| "loss": 2.1761, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_gen_len": 16.7426, | |
| "eval_loss": 2.006333589553833, | |
| "eval_rouge1": 0.3422, | |
| "eval_rouge2": 0.1605, | |
| "eval_rougeL": 0.3139, | |
| "eval_rougeLsum": 0.314, | |
| "eval_runtime": 782.1975, | |
| "eval_samples_per_second": 16.757, | |
| "eval_steps_per_second": 2.095, | |
| "step": 7373 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.0004143809810865798, | |
| "loss": 2.1455, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.0004049619471027052, | |
| "loss": 2.0777, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.0003955429131188306, | |
| "loss": 2.0945, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.00038612387913495596, | |
| "loss": 2.0485, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 0.00037670484515108134, | |
| "loss": 2.054, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.0003672858111672067, | |
| "loss": 2.073, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.0003578667771833321, | |
| "loss": 2.0396, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.0003484665812674252, | |
| "loss": 2.0387, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.0003390475472835506, | |
| "loss": 2.0198, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 0.000329628513299676, | |
| "loss": 2.0144, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.00032020947931580136, | |
| "loss": 2.0256, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.0003108092833998945, | |
| "loss": 2.0027, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.0003013902494160199, | |
| "loss": 2.0005, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.0002919712154321453, | |
| "loss": 1.9977, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 0.0002825521814482707, | |
| "loss": 1.9967, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_gen_len": 16.758, | |
| "eval_loss": 1.8912419080734253, | |
| "eval_rouge1": 0.3607, | |
| "eval_rouge2": 0.1759, | |
| "eval_rougeL": 0.3322, | |
| "eval_rougeLsum": 0.3321, | |
| "eval_runtime": 782.0201, | |
| "eval_samples_per_second": 16.76, | |
| "eval_steps_per_second": 2.096, | |
| "step": 14746 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 0.0002731519855323638, | |
| "loss": 1.9494, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 0.0002637329515484892, | |
| "loss": 1.8612, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 0.0002543139175646146, | |
| "loss": 1.9076, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 0.00024489488358074, | |
| "loss": 1.8987, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 0.00023547584959686534, | |
| "loss": 1.8882, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 0.00022605681561299075, | |
| "loss": 1.9032, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 0.00021663778162911613, | |
| "loss": 1.8853, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 0.00020721874764524151, | |
| "loss": 1.8841, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 0.00019781855172933463, | |
| "loss": 1.8747, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 0.00018841835581342778, | |
| "loss": 1.8842, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 0.00017899932182955316, | |
| "loss": 1.8856, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 0.00016958028784567857, | |
| "loss": 1.8751, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 0.00016016125386180395, | |
| "loss": 1.8806, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 0.00015074221987792933, | |
| "loss": 1.8519, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 0.00014134202396202245, | |
| "loss": 1.8716, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_gen_len": 16.6685, | |
| "eval_loss": 1.8340635299682617, | |
| "eval_rouge1": 0.3656, | |
| "eval_rouge2": 0.1782, | |
| "eval_rougeL": 0.3354, | |
| "eval_rougeLsum": 0.3355, | |
| "eval_runtime": 781.3209, | |
| "eval_samples_per_second": 16.775, | |
| "eval_steps_per_second": 2.098, | |
| "step": 22119 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 0.00013192298997814783, | |
| "loss": 1.8213, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 0.00012250395599427324, | |
| "loss": 1.78, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 0.00011308492201039862, | |
| "loss": 1.7891, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 0.00010366588802652401, | |
| "loss": 1.7992, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 9.424685404264939e-05, | |
| "loss": 1.8033, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 8.482782005877477e-05, | |
| "loss": 1.8072, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 7.540878607490017e-05, | |
| "loss": 1.7981, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 6.60085901589933e-05, | |
| "loss": 1.7978, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 5.6589556175118686e-05, | |
| "loss": 1.7927, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 4.7170522191244066e-05, | |
| "loss": 1.7735, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 3.7751488207369454e-05, | |
| "loss": 1.7863, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 2.8332454223494838e-05, | |
| "loss": 1.7965, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 1.8932258307587973e-05, | |
| "loss": 1.7722, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 9.513224323713361e-06, | |
| "loss": 1.7679, | |
| "step": 29000 | |
| } | |
| ], | |
| "max_steps": 29492, | |
| "num_train_epochs": 4, | |
| "total_flos": 1.1856217832207155e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |