{ "rl_info/A2G": -0.006648760288953781, "rl_info/entropy": 3.1247427463531494, "rl_info/total_token": 1710.0, "rl_info/advantage_b4_norm": -524.6408081054688, "rl_info/advantage_after_gnorm": 1.010803461074829, "rl_info/kl_w_ref": 0.0, "train/rl_loss": 0.6645635962486267, "train/lm_loss": 6.4453349113464355, "train/total_loss": 7.109898567199707, "samsum/rouge1": 0.07018707411132723, "samsum/rouge2": 0.015589278392518668, "samsum/rougeL": 0.05694806401174433, "samsum/rougeLsum": 0.05097452753746131, "samsum/bertscore_precision": 0.6306706219911575, "samsum/bertscore_recall": 0.7116748541593552, "samsum/bertscore_f1": 0.6681777884562811, "xsum/rouge1": 0.07577425197474222, "xsum/rouge2": 0.015173697537833916, "xsum/rougeL": 0.06791431460162027, "xsum/rougeLsum": 0.06226845688932118, "xsum/bertscore_precision": 0.685419961810112, "xsum/bertscore_recall": 0.6998258779446284, "xsum/bertscore_f1": 0.6894047458966573, "gigaword/rouge1": 0.01673533809218624, "gigaword/rouge2": 0.002688706117978111, "gigaword/rougeL": 0.016448820149720444, "gigaword/rougeLsum": 0.015425704010653109, "gigaword/bertscore_precision": 0.5675247123837471, "gigaword/bertscore_recall": 0.6473609921336174, "gigaword/bertscore_f1": 0.6041367189586162, "cnndm/rouge1": 0.12628644770575717, "cnndm/rouge2": 0.04433825429137848, "cnndm/rougeL": 0.09875398462560836, "cnndm/rougeLsum": 0.10678645281701828, "cnndm/bertscore_precision": 0.6603378802537918, "cnndm/bertscore_recall": 0.7300801773866018, "cnndm/bertscore_f1": 0.6927102555831274, "eval_agg/avg_all_rougef": 0.052643335804179334, "eval_agg/avg_all_bertf": 0.6636073772236705, "eval_agg/avg_all": 0.3581253565139249, "num_rl_rollout": 4, "lm_epoch": 0, "rl_epoch": 0, "step": 400, "total_data_token": 668982, "total_rl_token": 849448, "total_lm_token": 625452, "total_token": 1474900, "completed_steps": 400, "tune_objective": 0.784820971275843 }