diff --git "a/ray_tune_logs/result.json" "b/ray_tune_logs/result.json"
--- "a/ray_tune_logs/result.json"
+++ "b/ray_tune_logs/result.json"
@@ -1,8 +1,14 @@
-{"gigaword/rouge1": 0.016009562228265557, "gigaword/rouge2": 0.003056022795887401, "gigaword/rougeL": 0.015873002170997616, "gigaword/rougeLsum": 0.014966453674489327, "gigaword/bertscore_precision": 0.5707311329245567, "gigaword/bertscore_recall": 0.652623221874237, "gigaword/bertscore_f1": 0.6081619501113892, "cnndm/rouge1": 0.10217621552932292, "cnndm/rouge2": 0.028340214561100655, "cnndm/rougeL": 0.07993005276205918, "cnndm/rougeLsum": 0.08767832075358839, "cnndm/bertscore_precision": 0.6678036600351334, "cnndm/bertscore_recall": 0.7234405279159546, "cnndm/bertscore_f1": 0.693790078163147, "xsum/rouge1": 0.12240485954893322, "xsum/rouge2": 0.020703411244533874, "xsum/rougeL": 0.10276211214052106, "xsum/rougeLsum": 0.09471663512544302, "xsum/bertscore_precision": 0.7185406585534414, "xsum/bertscore_recall": 0.7206180840730667, "xsum/bertscore_f1": 0.716593990723292, "samsum/rouge1": 0.0614302960358649, "samsum/rouge2": 0.01363016003924649, "samsum/rougeL": 0.05405409512133013, "samsum/rougeLsum": 0.04035386836817009, "samsum/bertscore_precision": 0.6351983745892843, "samsum/bertscore_recall": 0.6781209856271744, "samsum/bertscore_f1": 0.6556738962729772, "eval_agg/avg_all_rougef": 0.05363033013123461, "eval_agg/avg_all_bertf": 0.6685549788177013, "eval_agg/avg_all": 0.361092654474468, "num_rl_rollout": 0, "lm_epoch": 0, "rl_epoch": 0, "step": 0, "total_data_token": 0, "total_rl_token": 0, "total_lm_token": 0, "total_token": 0, "completed_steps": 0, "tune_objective": 0.802525862403975, "timestamp": 1771999996, "checkpoint_dir_name": null, "done": false, "training_iteration": 1, "trial_id": "8e2a9d74", "date": "2026-02-25_14-13-16", "time_this_iter_s": 84.02033615112305, "time_total_s": 84.02033615112305, "pid": 2000857, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": true, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.348280208702833, "len_pen": 1.0, "accuracy_w2": 7.505241622349544, "len_pen2": 1.0, "threshold": 0.007501954443620123, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 84.02033615112305, "iterations_since_restore": 1}
-{"rollout/num_samples": 160, "rollout/avg_q1_length": 354.84375, "rollout/std_q1_length": 197.0732879638672, "rollout/gen/avg_score": -419.9128112792969, "rollout/gen/std_score": 226.78819274902344, "rollout/gen/avg_r1_length": 51.78125, "rollout/gen/std_r1_length": 12.442731857299805, "rollout/gen/avg_r1_score": -140.0390625, "rollout/gen/std_r1_score": 45.198699951171875, "rollout/gen/avg_r1_accuracy": 0.11827196180820465, "rollout/gen/std_r1_accuracy": 0.028525998815894127, "rollout/gen/avg_r2_length": 409.0625, "rollout/gen/std_r2_length": 155.95106506347656, "rollout/gen/avg_r2_score": -489.8811950683594, "rollout/gen/std_r2_score": 198.04808044433594, "rollout/gen/avg_r2_accuracy": 0.09521419554948807, "rollout/gen/std_r2_accuracy": 0.0337393581867218, "rollout/best_game/query_1": "By . Hugo Gye . PUBLISHED: . 05:38 EST, 8 May 2013 . | . UPDATED: . 06:29 EST, 8 May 2013 . A Muslim convert who plotted to attack soldiers at Wootton Bassett had a daughter shortly before he was sentenced to six years in prison, it was revealed today. Richard Dart's wife gave birth to a baby girl 12 weeks premature, according to his brother Robb Leech, but soon afterwards the child's father was jailed for preparing acts of terrorism. Filmmaker Mr Leech also described how Dart convinced him that he had renounced terror - only to be caught planning to team up with the Taliban and assassinate British spy chiefs. Brothers: Richard Dart, left, with Robb Leech before he converted to Islam and started a terror plot . Threats: Dart posted chilling online videos in which he railed against the West after his conversion . Writing in The Times today, Mr Leech said: 'While he was in custody before the trial, Rich's new wife gave birth to a daughter, 12 weeks early. 'Now healthy with a life full of possibilities ahead of her, she will need a father and someone to guide her through the world. My only hope is that when Rich gets out, his little daughter will be his calling.' Mr Leech previously made a documentary, My Brother the Islamist, about how his stepbrother Dart, 30, went from being a normal Western young man who worked as a BBC security guard to an Islamic radical who wandered the streets of London warning locals they faced 'hellfire'. He spent time with Dart and his fellow jihadists, who incessantly talked about how they longed to go in Afghanistan and fight against the West's troops. 'They all talk about it, all the time,' he wrote. 'It's something to aspire to - fighting jihad and dying a martyr is like winning The X Factor for them.' Kids: Rich and Robb playing together as children after their parents married each other in 1992 . Documentary: Mr Leech, left, made a film about Dart called My Brother the Islamist in 2011 . Jailed: Dart was sentenced to six years in prison last month after admitting to the terrorist plot . But after his film was broadcast in 2011, Mr Leech believed that Dart - who as a teenager taught him how to smoke and told him which clothes to wear if he wanted to be cool - had turned away from violence. When they met, rather than trying to convert his brother to Islam, Dart would talk about his plan to set up a business selling a special paste for Muslims designed to keep away evil spirits. So when he was arrested in July last year, Mr Leech assumed the police had made a mistake - but eventually he realised that his brother had been deceiving him. In March, Dart pleaded guilty to conspiring with two other Islamists to attack soldiers at Royal Wootton Bassett and target the heads of MI5 and MI6. Mr Leech wrote in The Times that when he saw his brother in the dock at the Old Bailey, he was suddenly seized with 'a profound and ridiculous giggle' as he was overcome with emotion. Catching his brother's eye, he wrote, 'We became naughty kids at the back of the class whose hilarity intensifies as the teacher becomes more angry. 'It was like he was saying: \"Oops, did I not mention any of this?\"' When Dart was sentenced last month, he refused to stand in the dock, telling the judge: 'Judgment is only for Allah.' Co-conspirators Jahangir Alom, 26, a former Met police community support officer, and unemployed Imran Mahmood, 22, also received prison sentences over the terror plans.\nTL;DR: ", "rollout/best_game/query_2": "Given the text: --------------------------You Can Not Heave Aziz-Salman Ali ------------------------- Zaiz Saleh Ali ------------------------- Ali closed the book https://www.freepress.com/authors/zaiz-salman-ali?vel_id=AHAAC\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " --------------------------Unlike many in parliament, the USCongress continues to operate in secrecy. Even a word to human relations is often used,\nMove on! -------------------------\nShort Course Spreads ------------------------+", "rollout/best_game/whole_game": "ROLE 0: By . Hugo Gye . PUBLISHED: . 05:38 EST, 8 May 2013 . | . UPDATED: . 06:29 EST, 8 May 2013 . A Muslim convert who plotted to attack soldiers at Wootton Bassett had a daughter shortly before he was sentenced to six years in prison, it was revealed today. Richard Dart's wife gave birth to a baby girl 12 weeks premature, according to his brother Robb Leech, but soon afterwards the child's father was jailed for preparing acts of terrorism. Filmmaker Mr Leech also described how Dart convinced him that he had renounced terror - only to be caught planning to team up with the Taliban and assassinate British spy chiefs. Brothers: Richard Dart, left, with Robb Leech before he converted to Islam and started a terror plot . Threats: Dart posted chilling online videos in which he railed against the West after his conversion . Writing in The Times today, Mr Leech said: 'While he was in custody before the trial, Rich's new wife gave birth to a daughter, 12 weeks early. 'Now healthy with a life full of possibilities ahead of her, she will need a father and someone to guide her through the world. My only hope is that when Rich gets out, his little daughter will be his calling.' Mr Leech previously made a documentary, My Brother the Islamist, about how his stepbrother Dart, 30, went from being a normal Western young man who worked as a BBC security guard to an Islamic radical who wandered the streets of London warning locals they faced 'hellfire'. He spent time with Dart and his fellow jihadists, who incessantly talked about how they longed to go in Afghanistan and fight against the West's troops. 'They all talk about it, all the time,' he wrote. 'It's something to aspire to - fighting jihad and dying a martyr is like winning The X Factor for them.' Kids: Rich and Robb playing together as children after their parents married each other in 1992 . Documentary: Mr Leech, left, made a film about Dart called My Brother the Islamist in 2011 . Jailed: Dart was sentenced to six years in prison last month after admitting to the terrorist plot . But after his film was broadcast in 2011, Mr Leech believed that Dart - who as a teenager taught him how to smoke and told him which clothes to wear if he wanted to be cool - had turned away from violence. When they met, rather than trying to convert his brother to Islam, Dart would talk about his plan to set up a business selling a special paste for Muslims designed to keep away evil spirits. So when he was arrested in July last year, Mr Leech assumed the police had made a mistake - but eventually he realised that his brother had been deceiving him. In March, Dart pleaded guilty to conspiring with two other Islamists to attack soldiers at Royal Wootton Bassett and target the heads of MI5 and MI6. Mr Leech wrote in The Times that when he saw his brother in the dock at the Old Bailey, he was suddenly seized with 'a profound and ridiculous giggle' as he was overcome with emotion. Catching his brother's eye, he wrote, 'We became naughty kids at the back of the class whose hilarity intensifies as the teacher becomes more angry. 'It was like he was saying: \"Oops, did I not mention any of this?\"' When Dart was sentenced last month, he refused to stand in the dock, telling the judge: 'Judgment is only for Allah.' Co-conspirators Jahangir Alom, 26, a former Met police community support officer, and unemployed Imran Mahmood, 22, also received prison sentences over the terror plans.\nTL;DR: \nROLE 1: Given the text: --------------------------You Can Not Heave Aziz-Salman Ali ------------------------- Zaiz Saleh Ali ------------------------- Ali closed the book https://www.freepress.com/authors/zaiz-salman-ali?vel_id=AHAAC\nReconstruct the summarized text to the detailed:\nROLE 2:  --------------------------Unlike many in parliament, the USCongress continues to operate in secrecy. Even a word to human relations is often used,\nMove on! -------------------------\nShort Course Spreads ------------------------+", "rollout/best_game/overall_score": -203.22317743406506, "rollout/best_game/accuracy (r2)": 0.01465201465201465, "_this_batch_num_rl_token": 11355, "num_rl_rollout": 1, "lm_epoch": 0, "rl_epoch": 0, "step": 0, "total_data_token": 11355, "total_rl_token": 0, "total_lm_token": 0, "total_token": 0, "completed_steps": 0, "rollout/num_train_sample": 160, "timestamp": 1772000008, "checkpoint_dir_name": null, "done": false, "training_iteration": 2, "trial_id": "8e2a9d74", "date": "2026-02-25_14-13-28", "time_this_iter_s": 11.95687484741211, "time_total_s": 95.97721099853516, "pid": 2000857, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": true, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.348280208702833, "len_pen": 1.0, "accuracy_w2": 7.505241622349544, "len_pen2": 1.0, "threshold": 0.007501954443620123, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 95.97721099853516, "iterations_since_restore": 2}
-{"rollout/num_samples": 160, "rollout/avg_q1_length": 348.9375, "rollout/std_q1_length": 206.10519409179688, "rollout/gen/avg_score": -715.2359008789062, "rollout/gen/std_score": 1689.1280517578125, "rollout/gen/avg_r1_length": 53.03125, "rollout/gen/std_r1_length": 9.053547859191895, "rollout/gen/avg_r1_score": -128.81808471679688, "rollout/gen/std_r1_score": 49.80692672729492, "rollout/gen/avg_r1_accuracy": 0.11153872311115265, "rollout/gen/std_r1_accuracy": 0.03276694566011429, "rollout/gen/avg_r2_length": 403.6328125, "rollout/gen/std_r2_length": 170.04153442382812, "rollout/gen/avg_r2_score": -861.8403930664062, "rollout/gen/std_r2_score": 1860.9520263671875, "rollout/gen/avg_r2_accuracy": 0.08576149493455887, "rollout/gen/std_r2_accuracy": 0.03966589644551277, "rollout/best_game/query_1": "Storm chaser and photographer, Xavier Delorme, experienced a brush with death when a bolt struck the ground just 30 metres from where he was standing. Adrenaline-seeking Delorme, 31, has been following thunderstorms around France for years, trying to get as close as possible to take such incredible shots. However, it was this storm in Montpellier, during which Xavier found himself just  metres away from a huge bolt. Storm chasing photographer Xavier Delorme experienced his second brush with death in Montpellier, France . While snapping the shocking lightning storm, the photographer came just 30m away from one potentially lethal bolt . The French photographer, who travels over 20,000 km every year to captures these shots, is originally from Nimes in the South of France, and says that the shocking scenes are a great reminder of how unpredictable and fierce lightning can be. 'It was 4am and I had just experienced the most terrible hunt for storms in my life,' he explains. 'It started badly because I missed two important storms in Mende and Millau. 'I decided to head towards an enormous thunderstorm north of Montpellier. 'The conditions were horrendous in Montpellier. It was Armageddon! I barely had time to put down my equipment - an SLR and camcorder - before the light show began.' 'The conditions were horrendous in Montpellier,' he says. 'It was Armageddon!' 'I could feel the hair on the back of my neck stand up,' he says of the bolt's near-miss . Flashes began to erupt all over the sky - and then the big one came. 'This bolt illuminated the whole sky and struck only 30 metres away,' he says. 'I could feel the hair on the back of my neck stand up.' This isn't Xavier Delorme's first brush with death. In August, he faced a similarly terrifying bolt in Morbihan, France. But he isn't letting these instances keep him down. Xavier admits that while he won't stop chasing exciting storms, 30m is 'maybe the closest' he'd like to get . 'I hope to have many exciting times as I travel to find more storms, but maybe that was the closest I would like to get,' he admits. 'I was there until 3am and the rain was still falling heavily. 'It was time to pack up and leave, but it was a big hassle to get out of the pathway as everything was flooded. 'I kept calm and began to make my way back south, away from the horrific weather.'\nTL;DR: ", "rollout/best_game/query_2": "Given the text: \u00c2curvy storm pictures captured from a shot, individual or individual, a LOT. \u00a0you will find your own collaboration pics of these. plus Click here to see the most popular before using the hashtag rainbowshunt .\u00a0These are drawn by Max Olson\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " \"from Vortex gallery\" (included above illustration)\n\u00a0the old stairs suggested by our purpose in life was correctly found from interactivity and consistency", "rollout/best_game/whole_game": "ROLE 0: Storm chaser and photographer, Xavier Delorme, experienced a brush with death when a bolt struck the ground just 30 metres from where he was standing. Adrenaline-seeking Delorme, 31, has been following thunderstorms around France for years, trying to get as close as possible to take such incredible shots. However, it was this storm in Montpellier, during which Xavier found himself just  metres away from a huge bolt. Storm chasing photographer Xavier Delorme experienced his second brush with death in Montpellier, France . While snapping the shocking lightning storm, the photographer came just 30m away from one potentially lethal bolt . The French photographer, who travels over 20,000 km every year to captures these shots, is originally from Nimes in the South of France, and says that the shocking scenes are a great reminder of how unpredictable and fierce lightning can be. 'It was 4am and I had just experienced the most terrible hunt for storms in my life,' he explains. 'It started badly because I missed two important storms in Mende and Millau. 'I decided to head towards an enormous thunderstorm north of Montpellier. 'The conditions were horrendous in Montpellier. It was Armageddon! I barely had time to put down my equipment - an SLR and camcorder - before the light show began.' 'The conditions were horrendous in Montpellier,' he says. 'It was Armageddon!' 'I could feel the hair on the back of my neck stand up,' he says of the bolt's near-miss . Flashes began to erupt all over the sky - and then the big one came. 'This bolt illuminated the whole sky and struck only 30 metres away,' he says. 'I could feel the hair on the back of my neck stand up.' This isn't Xavier Delorme's first brush with death. In August, he faced a similarly terrifying bolt in Morbihan, France. But he isn't letting these instances keep him down. Xavier admits that while he won't stop chasing exciting storms, 30m is 'maybe the closest' he'd like to get . 'I hope to have many exciting times as I travel to find more storms, but maybe that was the closest I would like to get,' he admits. 'I was there until 3am and the rain was still falling heavily. 'It was time to pack up and leave, but it was a big hassle to get out of the pathway as everything was flooded. 'I kept calm and began to make my way back south, away from the horrific weather.'\nTL;DR: \nROLE 1: Given the text: \u00c2curvy storm pictures captured from a shot, individual or individual, a LOT. \u00a0you will find your own collaboration pics of these. plus Click here to see the most popular before using the hashtag rainbowshunt .\u00a0These are drawn by Max Olson\nReconstruct the summarized text to the detailed:\nROLE 2:  \"from Vortex gallery\" (included above illustration)\n\u00a0the old stairs suggested by our purpose in life was correctly found from interactivity and consistency", "rollout/best_game/overall_score": -87.8463409735885, "rollout/best_game/accuracy (r2)": 0.02135774218154081, "_this_batch_num_rl_token": 11166, "num_rl_rollout": 2, "lm_epoch": 0, "rl_epoch": 0, "step": 40, "total_data_token": 86360, "total_rl_token": 50798, "total_lm_token": 63839, "total_token": 114637, "completed_steps": 40, "rollout/num_train_sample": 160, "timestamp": 1772000029, "checkpoint_dir_name": null, "done": false, "training_iteration": 3, "trial_id": "8e2a9d74", "date": "2026-02-25_14-13-49", "time_this_iter_s": 21.76770305633545, "time_total_s": 117.7449140548706, "pid": 2000857, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": true, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.348280208702833, "len_pen": 1.0, "accuracy_w2": 7.505241622349544, "len_pen2": 1.0, "threshold": 0.007501954443620123, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 117.7449140548706, "iterations_since_restore": 3}
-{"rollout/num_samples": 160, "rollout/avg_q1_length": 344.625, "rollout/std_q1_length": 211.41500854492188, "rollout/gen/avg_score": -526.0184936523438, "rollout/gen/std_score": 1095.7274169921875, "rollout/gen/avg_r1_length": 46.125, "rollout/gen/std_r1_length": 15.395328521728516, "rollout/gen/avg_r1_score": -109.3932876586914, "rollout/gen/std_r1_score": 62.7203254699707, "rollout/gen/avg_r1_accuracy": 0.11598710715770721, "rollout/gen/std_r1_accuracy": 0.03228633105754852, "rollout/gen/avg_r2_length": 396.5, "rollout/gen/std_r2_length": 174.92445373535156, "rollout/gen/avg_r2_score": -630.1748046875, "rollout/gen/std_r2_score": 1203.12353515625, "rollout/gen/avg_r2_accuracy": 0.09144308418035507, "rollout/gen/std_r2_accuracy": 0.03879760205745697, "rollout/best_game/query_1": "The city council voted in favour of a Public Space Protection Order (PSPO) - a type of power aimed at tackling anti-social behaviour.\nThe authority said the ban would stop people from using \"intoxicating substances\" in the city centre.\nLincolnshire Police welcomed the ban, which is due to start in April, but said it did not go far enough.\nCouncil leader Ric Metcalfe said: \"The council has recognised there is a problem relating to street drinking and the use of new psychoactive substances and welcomes the reform of anti-social behaviour powers.\n\"We want our city centre to be a safe and welcoming place for residents and visitors alike.\"\nThe council said it would work with police to produce an enforcement plan to support the order.\nIn 2014, police recorded 820 incidents in Lincolnshire where the term \"legal highs\" was logged.\nInsp Pat Coates, neighbourhood policing inspector for Lincoln city centre, said he fully supported the council's action but he believed the measures did not go far enough.\n\"It tackles the on-street problem of legal high usage and the anti-social behaviour that we've seen as a consequence of that,\" he said.\n\"We would like to see better legislation to enable us to deal with the actual sellers.\"\nThe Centre for Social Justice (CJS) is also calling for action to tackle the suppliers of legal highs.\nIt wants new police powers to close shops that persist in selling them.\nThe exclusion zone covers a large part of central Lincoln, including the city's football ground, cathedral and shopping areas.\nTL;DR: ", "rollout/best_game/query_2": "Given the text: _____ location and accessibility issues at the expense of the fans (Thank you golly)\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " ____ point for perspective _____\n\nAdvertisements\n\nLike this: Like Loading... Related Releases", "rollout/best_game/whole_game": "ROLE 0: The city council voted in favour of a Public Space Protection Order (PSPO) - a type of power aimed at tackling anti-social behaviour.\nThe authority said the ban would stop people from using \"intoxicating substances\" in the city centre.\nLincolnshire Police welcomed the ban, which is due to start in April, but said it did not go far enough.\nCouncil leader Ric Metcalfe said: \"The council has recognised there is a problem relating to street drinking and the use of new psychoactive substances and welcomes the reform of anti-social behaviour powers.\n\"We want our city centre to be a safe and welcoming place for residents and visitors alike.\"\nThe council said it would work with police to produce an enforcement plan to support the order.\nIn 2014, police recorded 820 incidents in Lincolnshire where the term \"legal highs\" was logged.\nInsp Pat Coates, neighbourhood policing inspector for Lincoln city centre, said he fully supported the council's action but he believed the measures did not go far enough.\n\"It tackles the on-street problem of legal high usage and the anti-social behaviour that we've seen as a consequence of that,\" he said.\n\"We would like to see better legislation to enable us to deal with the actual sellers.\"\nThe Centre for Social Justice (CJS) is also calling for action to tackle the suppliers of legal highs.\nIt wants new police powers to close shops that persist in selling them.\nThe exclusion zone covers a large part of central Lincoln, including the city's football ground, cathedral and shopping areas.\nTL;DR: \nROLE 1: Given the text: _____ location and accessibility issues at the expense of the fans (Thank you golly)\nReconstruct the summarized text to the detailed:\nROLE 2:  ____ point for perspective _____\n\nAdvertisements\n\nLike this: Like Loading... Related Releases", "rollout/best_game/overall_score": -43.401829261970065, "rollout/best_game/accuracy (r2)": 0.012254901960784312, "_this_batch_num_rl_token": 11028, "num_rl_rollout": 3, "lm_epoch": 0, "rl_epoch": 0, "step": 80, "total_data_token": 162393, "total_rl_token": 103151, "total_lm_token": 128844, "total_token": 231995, "completed_steps": 80, "rollout/num_train_sample": 160, "timestamp": 1772000052, "checkpoint_dir_name": null, "done": false, "training_iteration": 4, "trial_id": "8e2a9d74", "date": "2026-02-25_14-14-12", "time_this_iter_s": 22.13875412940979, "time_total_s": 139.8836681842804, "pid": 2000857, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": true, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.348280208702833, "len_pen": 1.0, "accuracy_w2": 7.505241622349544, "len_pen2": 1.0, "threshold": 0.007501954443620123, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 139.8836681842804, "iterations_since_restore": 4}
-{"rl_info/A2G": 0.00825701467692852, "rl_info/entropy": 2.993694543838501, "rl_info/total_token": 835.0, "rl_info/advantage_b4_norm": -582.863525390625, "rl_info/advantage_after_gnorm": 0.3941856622695923, "rl_info/kl_w_ref": 0.0, "train/rl_loss": -0.8260008692741394, "train/lm_loss": 6.751130104064941, "train/total_loss": 5.925129413604736, "gigaword/rouge1": 0.016742701847684074, "gigaword/rouge2": 0.003441013157636506, "gigaword/rougeL": 0.016595140872074317, "gigaword/rougeLsum": 0.015685252902056062, "gigaword/bertscore_precision": 0.5703791370987892, "gigaword/bertscore_recall": 0.6500895515084266, "gigaword/bertscore_f1": 0.606925301104784, "cnndm/rouge1": 0.15312917942774656, "cnndm/rouge2": 0.04995152763132344, "cnndm/rougeL": 0.10950050108670452, "cnndm/rougeLsum": 0.1268853969558347, "cnndm/bertscore_precision": 0.6818326065937678, "cnndm/bertscore_recall": 0.7625597367684046, "cnndm/bertscore_f1": 0.7187455942233404, "xsum/rouge1": 0.09477250444254975, "xsum/rouge2": 0.018553337609002556, "xsum/rougeL": 0.07387138589465349, "xsum/rougeLsum": 0.06936036064770383, "xsum/bertscore_precision": 0.6905907591183981, "xsum/bertscore_recall": 0.7134289890527725, "xsum/bertscore_f1": 0.6980059444904327, "samsum/rouge1": 0.059016980930838465, "samsum/rouge2": 0.01552071832148729, "samsum/rougeL": 0.05260676205007555, "samsum/rougeLsum": 0.041273076498417466, "samsum/bertscore_precision": 0.6214561263720194, "samsum/bertscore_recall": 0.678465281923612, "samsum/bertscore_f1": 0.6479867299397787, "eval_agg/avg_all_rougef": 0.05730661501723679, "eval_agg/avg_all_bertf": 0.6679158924395839, "eval_agg/avg_all": 0.36261125372841035, "num_rl_rollout": 3, "lm_epoch": 0, "rl_epoch": 0, "step": 100, "total_data_token": 194375, "total_rl_token": 128051, "total_lm_token": 160826, "total_token": 288877, "completed_steps": 100, "tune_objective": 0.8231032319645226, "timestamp": 1772000066, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 5, "trial_id": "8e2a9d74", "date": "2026-02-25_14-14-26", "time_this_iter_s": 14.22416090965271, "time_total_s": 154.1078290939331, "pid": 2000857, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": true, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.348280208702833, "len_pen": 1.0, "accuracy_w2": 7.505241622349544, "len_pen2": 1.0, "threshold": 0.007501954443620123, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 154.1078290939331, "iterations_since_restore": 5}
-{"rollout/num_samples": 160, "rollout/avg_q1_length": 311.90625, "rollout/std_q1_length": 199.65994262695312, "rollout/gen/avg_score": -470.2471618652344, "rollout/gen/std_score": 794.8897705078125, "rollout/gen/avg_r1_length": 50.78125, "rollout/gen/std_r1_length": 10.444119453430176, "rollout/gen/avg_r1_score": -123.47400665283203, "rollout/gen/std_r1_score": 54.4940185546875, "rollout/gen/avg_r1_accuracy": 0.1047220453619957, "rollout/gen/std_r1_accuracy": 0.038764793425798416, "rollout/gen/avg_r2_length": 396.1640625, "rollout/gen/std_r2_length": 172.8953857421875, "rollout/gen/avg_r2_score": -556.9404907226562, "rollout/gen/std_r2_score": 867.4425048828125, "rollout/gen/avg_r2_accuracy": 0.08309996128082275, "rollout/gen/std_r2_accuracy": 0.039804957807064056, "rollout/best_game/query_1": "GP3 driver Konstantin Tereshchenko survived a terrifying crash at Spa that saw his car fly into the air, roll and then burst into flames. The 20-year-old caught the kerbat turn 18 during a practice session at the Belgian Grand Prix and his car was sent out of control. Tereshchenko flew threw the air, over the track and on to the run-off area, before his car rolled as it landed. The car then erupted into flames as it came to rest near a barrier and Tereshchenko quickly jumped out unharmed. The young Russian driver was making his debut in the GP3 series for the Trident team. SCROLL DOWN FOR VIDEO . Flying: Konstantin Tereshchenko's car soared threw the air after clipping a kerb at Spa . Landing: The 20-year-old's car flipped as it touched the ground on the run off area . Survival: Tereshchenko was making his debut in the GP3 series for the Trident team . Heated: The Russian escaped from his car as it burst into flames when coming to rest near a barrier .\nTL;DR: ", "rollout/best_game/query_2": "Given the text: \u00a0Rob 5 own tragedy why Created by: emmets 5 have subsidised sped racing each month Deep the Middle Hereditary Challenge: Jake509728 Top 5 \u00a0 person best friend: Robert Mr Hands My favourite manicures:\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": "\nMet Peter Dale Scott II winners? Expect silver medals next week I do, James Scott Loss pre-race events. My goal now is to get Nico Nico Academics Mercedes Benz", "rollout/best_game/whole_game": "ROLE 0: GP3 driver Konstantin Tereshchenko survived a terrifying crash at Spa that saw his car fly into the air, roll and then burst into flames. The 20-year-old caught the kerbat turn 18 during a practice session at the Belgian Grand Prix and his car was sent out of control. Tereshchenko flew threw the air, over the track and on to the run-off area, before his car rolled as it landed. The car then erupted into flames as it came to rest near a barrier and Tereshchenko quickly jumped out unharmed. The young Russian driver was making his debut in the GP3 series for the Trident team. SCROLL DOWN FOR VIDEO . Flying: Konstantin Tereshchenko's car soared threw the air after clipping a kerb at Spa . Landing: The 20-year-old's car flipped as it touched the ground on the run off area . Survival: Tereshchenko was making his debut in the GP3 series for the Trident team . Heated: The Russian escaped from his car as it burst into flames when coming to rest near a barrier .\nTL;DR: \nROLE 1: Given the text: \u00a0Rob 5 own tragedy why Created by: emmets 5 have subsidised sped racing each month Deep the Middle Hereditary Challenge: Jake509728 Top 5 \u00a0 person best friend: Robert Mr Hands My favourite manicures:\nReconstruct the summarized text to the detailed:\nROLE 2: \nMet Peter Dale Scott II winners? Expect silver medals next week I do, James Scott Loss pre-race events. My goal now is to get Nico Nico Academics Mercedes Benz", "rollout/best_game/overall_score": -89.04985600724953, "rollout/best_game/accuracy (r2)": 0.006410256410256409, "_this_batch_num_rl_token": 9981, "num_rl_rollout": 4, "lm_epoch": 0, "rl_epoch": 0, "step": 120, "total_data_token": 231524, "total_rl_token": 153515, "total_lm_token": 187994, "total_token": 341509, "completed_steps": 120, "rollout/num_train_sample": 160, "timestamp": 1772000082, "checkpoint_dir_name": null, "done": false, "training_iteration": 6, "trial_id": "8e2a9d74", "date": "2026-02-25_14-14-42", "time_this_iter_s": 16.147478818893433, "time_total_s": 170.25530791282654, "pid": 2000857, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": true, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.348280208702833, "len_pen": 1.0, "accuracy_w2": 7.505241622349544, "len_pen2": 1.0, "threshold": 0.007501954443620123, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 170.25530791282654, "iterations_since_restore": 6}
-{"rollout/num_samples": 160, "rollout/avg_q1_length": 407.28125, "rollout/std_q1_length": 207.5860137939453, "rollout/gen/avg_score": -545.8427124023438, "rollout/gen/std_score": 1092.8277587890625, "rollout/gen/avg_r1_length": 49.84375, "rollout/gen/std_r1_length": 13.188727378845215, "rollout/gen/avg_r1_score": -128.72021484375, "rollout/gen/std_r1_score": 54.17051696777344, "rollout/gen/avg_r1_accuracy": 0.1204671859741211, "rollout/gen/std_r1_accuracy": 0.030183514580130577, "rollout/gen/avg_r2_length": 407.375, "rollout/gen/std_r2_length": 168.80072021484375, "rollout/gen/avg_r2_score": -650.123291015625, "rollout/gen/std_r2_score": 1199.8646240234375, "rollout/gen/avg_r2_accuracy": 0.0932493805885315, "rollout/gen/std_r2_accuracy": 0.0389903299510479, "rollout/best_game/query_1": "Mr Megraw was one of the 16 murder victims who became known as the Disappeared.\nHis remains were found in a drainage ditch on Oristown bog, near Kells, by contractors called in to prepare the site for forensic excavations.\nDNA tests have positively identified the remains as those of Mr Megraw.\nThe coroner for the city of Dublin has accepted this as evidence of identification and will shortly authorise the release of the remains to the family.\nIn a statement, Brendan Megraw's brother, Kieran, said the family are relieved that he has been found.\n\"He has been alone for nearly 40 years and now we can bring him home and lay him to rest with our mum and dad,\" he said.\n\"We want to thank all those who have supported us over the years.\"\nHe added: \"Brendan was found because more information came in to refine the search area and we are hugely grateful for whoever provided it.\n\"We hope and pray that the suffering of those still waiting for the return of their loved ones will soon be brought to an end\".\nMr Megraw was 23 when he was abducted from Twinbrook in Belfast in 1978, and murdered by the IRA.\nHe had recently been married and was awaiting the birth of his daughter.\nHis kidnappers had drugged his wife, Marie, in their home as they waited for his return, and as they took him away they warned her not to worry or contact police.\nThe Megraw family were only told by the IRA in 1999 that he was one of the Disappeared and his body had been dumped on the bogland near the town of Kells in County Meath.\nThree previous searches for Mr Megraw, the most recent in 2010, were unsuccessful.\nThe Disappeared were abducted, murdered and secretly buried by republican paramilitaries during the Troubles.\nSeparate searches have also taken place on bogland a few miles away from where Mr Megraw was buried in County Meath for the remains of Kevin McKee and Seamus Wright, both of whom were abducted by the IRA in October 1972.\nIt is also suspected Joseph Lynskey, a former Cistercian monk taken from the Beechmount area of west Belfast in the summer of 1972, was also buried somewhere in the region.\nTL;DR: ", "rollout/best_game/query_2": "Given the text: __________\nNicolhos Zainous PhotographyNick Stokes=o--idea of grief and impatience_______________________________________________________\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " __________Nicolhos Zainous PhotographyThanks to: Santa Gabandas, Markusa Schroeder, Mathias von Mohr, Jansen Baumann, Goethe, Thomas Holtzmann, Ivar Garonne, Bob Cole, Bob Rafferty, Stefan Rebert, Eliza Hassoglou, Joshua Kowalski, Karl Nagel, Martin Glazerbacker, Johannes Herrmann , Paul Harmerstein, Hans Wittlich , Paz Kassian, Dani\u00ebl Kopijave, Daannd Sawaryal, Yousef Steiner , Brian Zelikovsky _______________________________________________Source-Medium rate, 11 min. Helpful 1000 words, signed work by Egor Holstspirit. Created August 31, 2010.", "rollout/best_game/whole_game": "ROLE 0: Mr Megraw was one of the 16 murder victims who became known as the Disappeared.\nHis remains were found in a drainage ditch on Oristown bog, near Kells, by contractors called in to prepare the site for forensic excavations.\nDNA tests have positively identified the remains as those of Mr Megraw.\nThe coroner for the city of Dublin has accepted this as evidence of identification and will shortly authorise the release of the remains to the family.\nIn a statement, Brendan Megraw's brother, Kieran, said the family are relieved that he has been found.\n\"He has been alone for nearly 40 years and now we can bring him home and lay him to rest with our mum and dad,\" he said.\n\"We want to thank all those who have supported us over the years.\"\nHe added: \"Brendan was found because more information came in to refine the search area and we are hugely grateful for whoever provided it.\n\"We hope and pray that the suffering of those still waiting for the return of their loved ones will soon be brought to an end\".\nMr Megraw was 23 when he was abducted from Twinbrook in Belfast in 1978, and murdered by the IRA.\nHe had recently been married and was awaiting the birth of his daughter.\nHis kidnappers had drugged his wife, Marie, in their home as they waited for his return, and as they took him away they warned her not to worry or contact police.\nThe Megraw family were only told by the IRA in 1999 that he was one of the Disappeared and his body had been dumped on the bogland near the town of Kells in County Meath.\nThree previous searches for Mr Megraw, the most recent in 2010, were unsuccessful.\nThe Disappeared were abducted, murdered and secretly buried by republican paramilitaries during the Troubles.\nSeparate searches have also taken place on bogland a few miles away from where Mr Megraw was buried in County Meath for the remains of Kevin McKee and Seamus Wright, both of whom were abducted by the IRA in October 1972.\nIt is also suspected Joseph Lynskey, a former Cistercian monk taken from the Beechmount area of west Belfast in the summer of 1972, was also buried somewhere in the region.\nTL;DR: \nROLE 1: Given the text: __________\nNicolhos Zainous PhotographyNick Stokes=o--idea of grief and impatience_______________________________________________________\nReconstruct the summarized text to the detailed:\nROLE 2:  __________Nicolhos Zainous PhotographyThanks to: Santa Gabandas, Markusa Schroeder, Mathias von Mohr, Jansen Baumann, Goethe, Thomas Holtzmann, Ivar Garonne, Bob Cole, Bob Rafferty, Stefan Rebert, Eliza Hassoglou, Joshua Kowalski, Karl Nagel, Martin Glazerbacker, Johannes Herrmann , Paul Harmerstein, Hans Wittlich , Paz Kassian, Dani\u00ebl Kopijave, Daannd Sawaryal, Yousef Steiner , Brian Zelikovsky _______________________________________________Source-Medium rate, 11 min. Helpful 1000 words, signed work by Egor Holstspirit. Created August 31, 2010.", "rollout/best_game/overall_score": -189.71402459090018, "rollout/best_game/accuracy (r2)": 0.008968609865470852, "_this_batch_num_rl_token": 13033, "num_rl_rollout": 5, "lm_epoch": 0, "rl_epoch": 0, "step": 160, "total_data_token": 307172, "total_rl_token": 206239, "total_lm_token": 250609, "total_token": 456848, "completed_steps": 160, "rollout/num_train_sample": 160, "timestamp": 1772000104, "checkpoint_dir_name": null, "done": false, "training_iteration": 7, "trial_id": "8e2a9d74", "date": "2026-02-25_14-15-04", "time_this_iter_s": 22.20146417617798, "time_total_s": 192.45677208900452, "pid": 2000857, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": true, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.348280208702833, "len_pen": 1.0, "accuracy_w2": 7.505241622349544, "len_pen2": 1.0, "threshold": 0.007501954443620123, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 192.45677208900452, "iterations_since_restore": 7}
-{"rl_info/A2G": -0.002398526296019554, "rl_info/entropy": 3.034477472305298, "rl_info/total_token": 1450.0, "rl_info/advantage_b4_norm": -418.8110656738281, "rl_info/advantage_after_gnorm": 0.03449975699186325, "rl_info/kl_w_ref": 0.0, "train/rl_loss": 0.2395491898059845, "train/lm_loss": 7.669000148773193, "train/total_loss": 7.9085493087768555, "gigaword/rouge1": 0.01644014181715206, "gigaword/rouge2": 0.002911858811769318, "gigaword/rougeL": 0.016247148568490383, "gigaword/rougeLsum": 0.015488518873864442, "gigaword/bertscore_precision": 0.5697136095166206, "gigaword/bertscore_recall": 0.6516547846794128, "gigaword/bertscore_f1": 0.6071493943035603, "cnndm/rouge1": 0.10557082449436699, "cnndm/rouge2": 0.030450721321285536, "cnndm/rougeL": 0.08404104172554488, "cnndm/rougeLsum": 0.09175364174541716, "cnndm/bertscore_precision": 0.6598478257656097, "cnndm/bertscore_recall": 0.727394183476766, "cnndm/bertscore_f1": 0.6912071158488592, "xsum/rouge1": 0.12184816411632697, "xsum/rouge2": 0.015368479742243849, "xsum/rougeL": 0.10261010442805257, "xsum/rougeLsum": 0.09301319321351881, "xsum/bertscore_precision": 0.70936851700147, "xsum/bertscore_recall": 0.7144647091627121, "xsum/bertscore_f1": 0.7087334344784418, "samsum/rouge1": 0.06153679462433529, "samsum/rouge2": 0.012781130766503982, "samsum/rougeL": 0.05501040472860041, "samsum/rougeLsum": 0.04174618882094905, "samsum/bertscore_precision": 0.6318484346071879, "samsum/bertscore_recall": 0.6875239958365759, "samsum/bertscore_f1": 0.6580331176519394, "eval_agg/avg_all_rougef": 0.05417614736240135, "eval_agg/avg_all_bertf": 0.6662807655707002, "eval_agg/avg_all": 0.3602284564665508, "num_rl_rollout": 5, "lm_epoch": 0, "rl_epoch": 0, "step": 200, "total_data_token": 369800, "total_rl_token": 259579, "total_lm_token": 313237, "total_token": 572816, "completed_steps": 200, "tune_objective": 0.8034492002298393, "timestamp": 1772000124, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": true, "training_iteration": 8, "trial_id": "8e2a9d74", "date": "2026-02-25_14-15-24", "time_this_iter_s": 19.738354682922363, "time_total_s": 212.19512677192688, "pid": 2000857, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": true, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.348280208702833, "len_pen": 1.0, "accuracy_w2": 7.505241622349544, "len_pen2": 1.0, "threshold": 0.007501954443620123, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 212.19512677192688, "iterations_since_restore": 8}
+{"gigaword/rouge1": 0.016009562228265557, "gigaword/rouge2": 0.003056022795887401, "gigaword/rougeL": 0.015873002170997616, "gigaword/rougeLsum": 0.014966453674489327, "gigaword/bertscore_precision": 0.5707311309874058, "gigaword/bertscore_recall": 0.652623221129179, "gigaword/bertscore_f1": 0.6081619510054588, "xsum/rouge1": 0.12240485954893322, "xsum/rouge2": 0.020703411244533874, "xsum/rougeL": 0.10276211214052106, "xsum/rougeLsum": 0.09471663512544302, "xsum/bertscore_precision": 0.7185406535863876, "xsum/bertscore_recall": 0.7206180840730667, "xsum/bertscore_f1": 0.7165939857562383, "samsum/rouge1": 0.0614302960358649, "samsum/rouge2": 0.01363016003924649, "samsum/rougeL": 0.05405409512133013, "samsum/rougeLsum": 0.04035386836817009, "samsum/bertscore_precision": 0.6351983745892843, "samsum/bertscore_recall": 0.6781209856271744, "samsum/bertscore_f1": 0.6556738962729772, "cnndm/rouge1": 0.10217621552932292, "cnndm/rouge2": 0.028340214561100655, "cnndm/rougeL": 0.07993005276205918, "cnndm/rougeLsum": 0.08767832075358839, "cnndm/bertscore_precision": 0.6678036600351334, "cnndm/bertscore_recall": 0.7234405279159546, "cnndm/bertscore_f1": 0.693790078163147, "eval_agg/avg_all_rougef": 0.05363033013123462, "eval_agg/avg_all_bertf": 0.6685549777994554, "eval_agg/avg_all": 0.361092653965345, "num_rl_rollout": 0, "lm_epoch": 0, "rl_epoch": 0, "step": 0, "total_data_token": 0, "total_rl_token": 0, "total_lm_token": 0, "total_token": 0, "completed_steps": 0, "tune_objective": 0.8025258611622117, "timestamp": 1772000239, "checkpoint_dir_name": null, "done": false, "training_iteration": 1, "trial_id": "4849cf6b", "date": "2026-02-25_14-17-19", "time_this_iter_s": 94.04087519645691, "time_total_s": 94.04087519645691, "pid": 2003771, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "on_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.5456347144827906, "len_pen": 1.0, "accuracy_w2": 2.5739712587324886, "len_pen2": 1.0, "threshold": 0.0010903843281968525, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 94.04087519645691, "iterations_since_restore": 1}
+{"rollout/num_samples": 160, "rollout/avg_q1_length": 354.84375, "rollout/std_q1_length": 197.0732879638672, "rollout/gen/avg_score": -410.3697204589844, "rollout/gen/std_score": 224.22265625, "rollout/gen/avg_r1_length": 51.78125, "rollout/gen/std_r1_length": 12.442731857299805, "rollout/gen/avg_r1_score": -140.468505859375, "rollout/gen/std_r1_score": 45.17983627319336, "rollout/gen/avg_r1_accuracy": 0.11827196180820465, "rollout/gen/std_r1_accuracy": 0.028525998815894127, "rollout/gen/avg_r2_length": 409.0625, "rollout/gen/std_r2_length": 155.95106506347656, "rollout/gen/avg_r2_score": -477.8450012207031, "rollout/gen/std_r2_score": 198.7501983642578, "rollout/gen/avg_r2_accuracy": 0.09521419554948807, "rollout/gen/std_r2_accuracy": 0.0337393581867218, "rollout/best_game/query_1": "By . Hugo Gye . PUBLISHED: . 05:38 EST, 8 May 2013 . | . UPDATED: . 06:29 EST, 8 May 2013 . A Muslim convert who plotted to attack soldiers at Wootton Bassett had a daughter shortly before he was sentenced to six years in prison, it was revealed today. Richard Dart's wife gave birth to a baby girl 12 weeks premature, according to his brother Robb Leech, but soon afterwards the child's father was jailed for preparing acts of terrorism. Filmmaker Mr Leech also described how Dart convinced him that he had renounced terror - only to be caught planning to team up with the Taliban and assassinate British spy chiefs. Brothers: Richard Dart, left, with Robb Leech before he converted to Islam and started a terror plot . Threats: Dart posted chilling online videos in which he railed against the West after his conversion . Writing in The Times today, Mr Leech said: 'While he was in custody before the trial, Rich's new wife gave birth to a daughter, 12 weeks early. 'Now healthy with a life full of possibilities ahead of her, she will need a father and someone to guide her through the world. My only hope is that when Rich gets out, his little daughter will be his calling.' Mr Leech previously made a documentary, My Brother the Islamist, about how his stepbrother Dart, 30, went from being a normal Western young man who worked as a BBC security guard to an Islamic radical who wandered the streets of London warning locals they faced 'hellfire'. He spent time with Dart and his fellow jihadists, who incessantly talked about how they longed to go in Afghanistan and fight against the West's troops. 'They all talk about it, all the time,' he wrote. 'It's something to aspire to - fighting jihad and dying a martyr is like winning The X Factor for them.' Kids: Rich and Robb playing together as children after their parents married each other in 1992 . Documentary: Mr Leech, left, made a film about Dart called My Brother the Islamist in 2011 . Jailed: Dart was sentenced to six years in prison last month after admitting to the terrorist plot . But after his film was broadcast in 2011, Mr Leech believed that Dart - who as a teenager taught him how to smoke and told him which clothes to wear if he wanted to be cool - had turned away from violence. When they met, rather than trying to convert his brother to Islam, Dart would talk about his plan to set up a business selling a special paste for Muslims designed to keep away evil spirits. So when he was arrested in July last year, Mr Leech assumed the police had made a mistake - but eventually he realised that his brother had been deceiving him. In March, Dart pleaded guilty to conspiring with two other Islamists to attack soldiers at Royal Wootton Bassett and target the heads of MI5 and MI6. Mr Leech wrote in The Times that when he saw his brother in the dock at the Old Bailey, he was suddenly seized with 'a profound and ridiculous giggle' as he was overcome with emotion. Catching his brother's eye, he wrote, 'We became naughty kids at the back of the class whose hilarity intensifies as the teacher becomes more angry. 'It was like he was saying: \"Oops, did I not mention any of this?\"' When Dart was sentenced last month, he refused to stand in the dock, telling the judge: 'Judgment is only for Allah.' Co-conspirators Jahangir Alom, 26, a former Met police community support officer, and unemployed Imran Mahmood, 22, also received prison sentences over the terror plans.\nTL;DR: ", "rollout/best_game/query_2": "Given the text: --------------------------You Can Not Heave Aziz-Salman Ali ------------------------- Zaiz Saleh Ali ------------------------- Ali closed the book https://www.freepress.com/authors/zaiz-salman-ali?vel_id=AHAAC\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " --------------------------Unlike many in parliament, the USCongress continues to operate in secrecy. Even a word to human relations is often used,\nMove on! -------------------------\nShort Course Spreads ------------------------+", "rollout/best_game/whole_game": "ROLE 0: By . Hugo Gye . PUBLISHED: . 05:38 EST, 8 May 2013 . | . UPDATED: . 06:29 EST, 8 May 2013 . A Muslim convert who plotted to attack soldiers at Wootton Bassett had a daughter shortly before he was sentenced to six years in prison, it was revealed today. Richard Dart's wife gave birth to a baby girl 12 weeks premature, according to his brother Robb Leech, but soon afterwards the child's father was jailed for preparing acts of terrorism. Filmmaker Mr Leech also described how Dart convinced him that he had renounced terror - only to be caught planning to team up with the Taliban and assassinate British spy chiefs. Brothers: Richard Dart, left, with Robb Leech before he converted to Islam and started a terror plot . Threats: Dart posted chilling online videos in which he railed against the West after his conversion . Writing in The Times today, Mr Leech said: 'While he was in custody before the trial, Rich's new wife gave birth to a daughter, 12 weeks early. 'Now healthy with a life full of possibilities ahead of her, she will need a father and someone to guide her through the world. My only hope is that when Rich gets out, his little daughter will be his calling.' Mr Leech previously made a documentary, My Brother the Islamist, about how his stepbrother Dart, 30, went from being a normal Western young man who worked as a BBC security guard to an Islamic radical who wandered the streets of London warning locals they faced 'hellfire'. He spent time with Dart and his fellow jihadists, who incessantly talked about how they longed to go in Afghanistan and fight against the West's troops. 'They all talk about it, all the time,' he wrote. 'It's something to aspire to - fighting jihad and dying a martyr is like winning The X Factor for them.' Kids: Rich and Robb playing together as children after their parents married each other in 1992 . Documentary: Mr Leech, left, made a film about Dart called My Brother the Islamist in 2011 . Jailed: Dart was sentenced to six years in prison last month after admitting to the terrorist plot . But after his film was broadcast in 2011, Mr Leech believed that Dart - who as a teenager taught him how to smoke and told him which clothes to wear if he wanted to be cool - had turned away from violence. When they met, rather than trying to convert his brother to Islam, Dart would talk about his plan to set up a business selling a special paste for Muslims designed to keep away evil spirits. So when he was arrested in July last year, Mr Leech assumed the police had made a mistake - but eventually he realised that his brother had been deceiving him. In March, Dart pleaded guilty to conspiring with two other Islamists to attack soldiers at Royal Wootton Bassett and target the heads of MI5 and MI6. Mr Leech wrote in The Times that when he saw his brother in the dock at the Old Bailey, he was suddenly seized with 'a profound and ridiculous giggle' as he was overcome with emotion. Catching his brother's eye, he wrote, 'We became naughty kids at the back of the class whose hilarity intensifies as the teacher becomes more angry. 'It was like he was saying: \"Oops, did I not mention any of this?\"' When Dart was sentenced last month, he refused to stand in the dock, telling the judge: 'Judgment is only for Allah.' Co-conspirators Jahangir Alom, 26, a former Met police community support officer, and unemployed Imran Mahmood, 22, also received prison sentences over the terror plans.\nTL;DR: \nROLE 1: Given the text: --------------------------You Can Not Heave Aziz-Salman Ali ------------------------- Zaiz Saleh Ali ------------------------- Ali closed the book https://www.freepress.com/authors/zaiz-salman-ali?vel_id=AHAAC\nReconstruct the summarized text to the detailed:\nROLE 2:  --------------------------Unlike many in parliament, the USCongress continues to operate in secrecy. Even a word to human relations is often used,\nMove on! -------------------------\nShort Course Spreads ------------------------+", "rollout/best_game/overall_score": -203.22317743406506, "rollout/best_game/accuracy (r2)": 0.01465201465201465, "_this_batch_num_rl_token": 11355, "num_rl_rollout": 1, "lm_epoch": 0, "rl_epoch": 0, "step": 0, "total_data_token": 11355, "total_rl_token": 0, "total_lm_token": 0, "total_token": 0, "completed_steps": 0, "rollout/num_train_sample": 160, "timestamp": 1772000251, "checkpoint_dir_name": null, "done": false, "training_iteration": 2, "trial_id": "4849cf6b", "date": "2026-02-25_14-17-31", "time_this_iter_s": 11.994565725326538, "time_total_s": 106.03544092178345, "pid": 2003771, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "on_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.5456347144827906, "len_pen": 1.0, "accuracy_w2": 2.5739712587324886, "len_pen2": 1.0, "threshold": 0.0010903843281968525, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 106.03544092178345, "iterations_since_restore": 2}
+{"rollout/num_samples": 160, "rollout/avg_q1_length": 348.9375, "rollout/std_q1_length": 206.10519409179688, "rollout/gen/avg_score": -484.09161376953125, "rollout/gen/std_score": 792.3521728515625, "rollout/gen/avg_r1_length": 49.03125, "rollout/gen/std_r1_length": 11.7266845703125, "rollout/gen/avg_r1_score": -109.65111541748047, "rollout/gen/std_r1_score": 58.99934768676758, "rollout/gen/avg_r1_accuracy": 0.11107651889324188, "rollout/gen/std_r1_accuracy": 0.03802986815571785, "rollout/gen/avg_r2_length": 425.3828125, "rollout/gen/std_r2_length": 155.73251342773438, "rollout/gen/avg_r2_score": -577.7017211914062, "rollout/gen/std_r2_score": 860.8161010742188, "rollout/gen/avg_r2_accuracy": 0.09114097058773041, "rollout/gen/std_r2_accuracy": 0.03926713019609451, "rollout/best_game/query_1": "Karoline: shopping tomorrow?\nTaylor: sure\nKaroline: great, need some stuff :)\nTL;DR: ", "rollout/best_game/query_2": "Given the text: \u00a0Hopefully the one enhance the shine of the sun, the yellow sun is my new favorite color. Also lets them give it some pearly white coloring. Think ya'll get them? I'm happy you received them from me but I really appreciate what you guys leave there\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " \u00a0\nThis might some as interesting as mentioning my granddaughter's cell phone is 7 hours old. Indeed, I could kindly just claim, I am auto-loading a beloved son riding his phone sitting in my car waiting for it to insert: \u00a0 Future \u00a0Written down to the printed version'd Facebook post:\u00a0 \"25ing 350 mg LSD sun june 2016\".", "rollout/best_game/whole_game": "ROLE 0: Karoline: shopping tomorrow?\nTaylor: sure\nKaroline: great, need some stuff :)\nTL;DR: \nROLE 1: Given the text: \u00a0Hopefully the one enhance the shine of the sun, the yellow sun is my new favorite color. Also lets them give it some pearly white coloring. Think ya'll get them? I'm happy you received them from me but I really appreciate what you guys leave there\nReconstruct the summarized text to the detailed:\nROLE 2:  \u00a0\nThis might some as interesting as mentioning my granddaughter's cell phone is 7 hours old. Indeed, I could kindly just claim, I am auto-loading a beloved son riding his phone sitting in my car waiting for it to insert: \u00a0 Future \u00a0Written down to the printed version'd Facebook post:\u00a0 \"25ing 350 mg LSD sun june 2016\".", "rollout/best_game/overall_score": -133.93182563272433, "rollout/best_game/accuracy (r2)": 0.019607843137254898, "_this_batch_num_rl_token": 11166, "num_rl_rollout": 2, "lm_epoch": 0, "rl_epoch": 0, "step": 20, "total_data_token": 55284, "total_rl_token": 54017, "total_lm_token": 32763, "total_token": 86780, "completed_steps": 20, "rollout/num_train_sample": 160, "timestamp": 1772000273, "checkpoint_dir_name": null, "done": false, "training_iteration": 3, "trial_id": "4849cf6b", "date": "2026-02-25_14-17-53", "time_this_iter_s": 21.074509143829346, "time_total_s": 127.1099500656128, "pid": 2003771, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "on_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.5456347144827906, "len_pen": 1.0, "accuracy_w2": 2.5739712587324886, "len_pen2": 1.0, "threshold": 0.0010903843281968525, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 127.1099500656128, "iterations_since_restore": 3}
+{"rollout/num_samples": 160, "rollout/avg_q1_length": 344.625, "rollout/std_q1_length": 211.41500854492188, "rollout/gen/avg_score": -447.6999206542969, "rollout/gen/std_score": 797.5082397460938, "rollout/gen/avg_r1_length": 47.3125, "rollout/gen/std_r1_length": 14.197768211364746, "rollout/gen/avg_r1_score": -114.16761779785156, "rollout/gen/std_r1_score": 60.868675231933594, "rollout/gen/avg_r1_accuracy": 0.11149488389492035, "rollout/gen/std_r1_accuracy": 0.03260429576039314, "rollout/gen/avg_r2_length": 384.9609375, "rollout/gen/std_r2_length": 179.49107360839844, "rollout/gen/avg_r2_score": -531.0830078125, "rollout/gen/std_r2_score": 871.9715576171875, "rollout/gen/avg_r2_accuracy": 0.08533501625061035, "rollout/gen/std_r2_accuracy": 0.03698955476284027, "rollout/best_game/query_1": "Susan: When are you coming?\nPaul: I'll be there in 1 hour\nPieter: 1,5-2 hours\nSusan: OK\nSusan: I'm already here\nSusan: I'm on the 5th floor\nSusan: South wing \nTL;DR: ", "rollout/best_game/query_2": "Given the text: \u00a0There will be no downtime that bandscore is baiting everyone to confront this already electric place. Hope this makes sense to R&B fans around the world. I'm HAPPY! - Ricky Doblin\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " \"Archeologists believe Jupiter is cool beyond compare on many planets and could hold its own cool to take over planets for millions and perhaps trillions of years. It's not working like that; it's not doing it as well.\"", "rollout/best_game/whole_game": "ROLE 0: Susan: When are you coming?\nPaul: I'll be there in 1 hour\nPieter: 1,5-2 hours\nSusan: OK\nSusan: I'm already here\nSusan: I'm on the 5th floor\nSusan: South wing \nTL;DR: \nROLE 1: Given the text: \u00a0There will be no downtime that bandscore is baiting everyone to confront this already electric place. Hope this makes sense to R&B fans around the world. I'm HAPPY! - Ricky Doblin\nReconstruct the summarized text to the detailed:\nROLE 2:  \"Archeologists believe Jupiter is cool beyond compare on many planets and could hold its own cool to take over planets for millions and perhaps trillions of years. It's not working like that; it's not doing it as well.\"", "rollout/best_game/overall_score": -95.02980604108453, "rollout/best_game/accuracy (r2)": 0.017777777777777778, "_this_batch_num_rl_token": 11028, "num_rl_rollout": 3, "lm_epoch": 0, "rl_epoch": 0, "step": 40, "total_data_token": 97388, "total_rl_token": 110035, "total_lm_token": 63839, "total_token": 173874, "completed_steps": 40, "rollout/num_train_sample": 160, "timestamp": 1772000292, "checkpoint_dir_name": null, "done": false, "training_iteration": 4, "trial_id": "4849cf6b", "date": "2026-02-25_14-18-12", "time_this_iter_s": 19.962165594100952, "time_total_s": 147.07211565971375, "pid": 2003771, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "on_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.5456347144827906, "len_pen": 1.0, "accuracy_w2": 2.5739712587324886, "len_pen2": 1.0, "threshold": 0.0010903843281968525, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 147.07211565971375, "iterations_since_restore": 4}
+{"rollout/num_samples": 160, "rollout/avg_q1_length": 311.90625, "rollout/std_q1_length": 199.65994262695312, "rollout/gen/avg_score": -402.5068054199219, "rollout/gen/std_score": 229.35597229003906, "rollout/gen/avg_r1_length": 50.15625, "rollout/gen/std_r1_length": 13.210721969604492, "rollout/gen/avg_r1_score": -129.54266357421875, "rollout/gen/std_r1_score": 53.7617073059082, "rollout/gen/avg_r1_accuracy": 0.1114177405834198, "rollout/gen/std_r1_accuracy": 0.0435587577521801, "rollout/gen/avg_r2_length": 403.15625, "rollout/gen/std_r2_length": 161.2394561767578, "rollout/gen/avg_r2_score": -470.747802734375, "rollout/gen/std_r2_score": 204.17076110839844, "rollout/gen/avg_r2_accuracy": 0.08981794863939285, "rollout/gen/std_r2_accuracy": 0.04336870461702347, "rollout/best_game/query_1": "(CNN) -- Authorities in Colorado have filed charges against the parents in last month's notorious \"balloon boy\" case, and the pair's lawyers say the two are expected to plead guilty on Friday. The Larimer County district attorney's office Thursday said Richard Heene has been charged with one count of attempting to influence a public servant, a felony, and Mayumi Heene has been charged with one count of false reporting to authorities, a misdemeanor. Richard Heene turned himself in Thursday afternoon and was released on a $5,000 personal recognizance bond, the Latimer County Sheriff's Office said. Mayumi Heene did not appear with him. The Heenes will appear Friday in Larimer County Court, where they are expected to plead guilty, their attorneys said. The penalty range for the felony is two to six years in prison with a fine of $2,000 to $500,000. The range for the misdemeanor would be six months in jail with a fine of $50 to $750, the district attorney's office statement said. The Heenes' attorneys said prosecutors have agreed to a sentence of probation with the possibility of up to 90 days in jail for Richard Heene and up to 60 days in jail for his wife. The threat of deportation for Mayumi Heene was a factor in the plea deal negotiation, the attorney's statement said. \"Mayumi Heene is a citizen of Japan. As such, any felony conviction or certain misdemeanors would result in her deportation, even though her husband and children are Americans,\" the statement said. \"It is supremely ironic that law enforcement has expressed such grave concern over the welfare of the children, but it was ultimately the threat of taking the children's mother from the family and deporting her to Japan which fueled this deal.\" Prosecutors in the case could not be immediately reached for comment. On October 15, a large silver balloon came loose from moorings in the Heenes' yard and floated over Colorado. Mayumi Heene called 911 and said the couple's 6-year-old son Falcon was inside the craft. Millions of people across the country watched the saga on television for nearly two hours as military aircraft tracked the balloon in the air and rescuers chased it on the ground. Mayumi Heene later admitted the whole thing was a hoax and that Falcon was safe in their home the whole time, authorities said. Watch the moment the hoax was revealed . Court documents released last month said the couple hatched the plan about two weeks before the incident and \"instructed their three children to lie to authorities as well as the media regarding this hoax.\" Their motive? To \"make the Heene family more marketable for future media interests,\" the documents said.\nTL;DR: ", "rollout/best_game/query_2": "Given the text: \u00a0The Real Richard Heene Fouted His Child Tortures Quest after the Hook On 12-Year-Old Falcon Given All The Facebook Psyops Dude Such A Lousy Repair Clean Site Nor Is It Monday Their War Over Entertainment Since the Travel Grab Is Triumphant By 2018\u00a0 Disney\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " \u00a0Dick: \u00a0I wonder if they should implement \"bounty hunter\" laws if there isn't one to hopefully \"kill it like it counts for.\" A storm of accusations against the Disneyns and Cranky Publicns though...", "rollout/best_game/whole_game": "ROLE 0: (CNN) -- Authorities in Colorado have filed charges against the parents in last month's notorious \"balloon boy\" case, and the pair's lawyers say the two are expected to plead guilty on Friday. The Larimer County district attorney's office Thursday said Richard Heene has been charged with one count of attempting to influence a public servant, a felony, and Mayumi Heene has been charged with one count of false reporting to authorities, a misdemeanor. Richard Heene turned himself in Thursday afternoon and was released on a $5,000 personal recognizance bond, the Latimer County Sheriff's Office said. Mayumi Heene did not appear with him. The Heenes will appear Friday in Larimer County Court, where they are expected to plead guilty, their attorneys said. The penalty range for the felony is two to six years in prison with a fine of $2,000 to $500,000. The range for the misdemeanor would be six months in jail with a fine of $50 to $750, the district attorney's office statement said. The Heenes' attorneys said prosecutors have agreed to a sentence of probation with the possibility of up to 90 days in jail for Richard Heene and up to 60 days in jail for his wife. The threat of deportation for Mayumi Heene was a factor in the plea deal negotiation, the attorney's statement said. \"Mayumi Heene is a citizen of Japan. As such, any felony conviction or certain misdemeanors would result in her deportation, even though her husband and children are Americans,\" the statement said. \"It is supremely ironic that law enforcement has expressed such grave concern over the welfare of the children, but it was ultimately the threat of taking the children's mother from the family and deporting her to Japan which fueled this deal.\" Prosecutors in the case could not be immediately reached for comment. On October 15, a large silver balloon came loose from moorings in the Heenes' yard and floated over Colorado. Mayumi Heene called 911 and said the couple's 6-year-old son Falcon was inside the craft. Millions of people across the country watched the saga on television for nearly two hours as military aircraft tracked the balloon in the air and rescuers chased it on the ground. Mayumi Heene later admitted the whole thing was a hoax and that Falcon was safe in their home the whole time, authorities said. Watch the moment the hoax was revealed . Court documents released last month said the couple hatched the plan about two weeks before the incident and \"instructed their three children to lie to authorities as well as the media regarding this hoax.\" Their motive? To \"make the Heene family more marketable for future media interests,\" the documents said.\nTL;DR: \nROLE 1: Given the text: \u00a0The Real Richard Heene Fouted His Child Tortures Quest after the Hook On 12-Year-Old Falcon Given All The Facebook Psyops Dude Such A Lousy Repair Clean Site Nor Is It Monday Their War Over Entertainment Since the Travel Grab Is Triumphant By 2018\u00a0 Disney\nReconstruct the summarized text to the detailed:\nROLE 2:  \u00a0Dick: \u00a0I wonder if they should implement \"bounty hunter\" laws if there isn't one to hopefully \"kill it like it counts for.\" A storm of accusations against the Disneyns and Cranky Publicns though...", "rollout/best_game/overall_score": -208.42775074454752, "rollout/best_game/accuracy (r2)": 0.032459869262685064, "_this_batch_num_rl_token": 9981, "num_rl_rollout": 4, "lm_epoch": 0, "rl_epoch": 0, "step": 58, "total_data_token": 137733, "total_rl_token": 160824, "total_lm_token": 94203, "total_token": 255027, "completed_steps": 58, "rollout/num_train_sample": 160, "timestamp": 1772000312, "checkpoint_dir_name": null, "done": false, "training_iteration": 5, "trial_id": "4849cf6b", "date": "2026-02-25_14-18-32", "time_this_iter_s": 19.625412702560425, "time_total_s": 166.69752836227417, "pid": 2003771, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "on_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.5456347144827906, "len_pen": 1.0, "accuracy_w2": 2.5739712587324886, "len_pen2": 1.0, "threshold": 0.0010903843281968525, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 166.69752836227417, "iterations_since_restore": 5}
+{"rollout/num_samples": 160, "rollout/avg_q1_length": 407.28125, "rollout/std_q1_length": 207.5860137939453, "rollout/gen/avg_score": -380.6771240234375, "rollout/gen/std_score": 221.6674041748047, "rollout/gen/avg_r1_length": 50.8125, "rollout/gen/std_r1_length": 11.145482063293457, "rollout/gen/avg_r1_score": -130.11257934570312, "rollout/gen/std_r1_score": 52.19103240966797, "rollout/gen/avg_r1_accuracy": 0.12255966663360596, "rollout/gen/std_r1_accuracy": 0.03494023159146309, "rollout/gen/avg_r2_length": 386.25, "rollout/gen/std_r2_length": 160.9437713623047, "rollout/gen/avg_r2_score": -443.3182373046875, "rollout/gen/std_r2_score": 202.67791748046875, "rollout/gen/avg_r2_accuracy": 0.09721726179122925, "rollout/gen/std_r2_accuracy": 0.03931763023138046, "rollout/best_game/query_1": "The orange zinnia flowers can be eaten and were grown in the 'Veggie' laboratory, which was installed on the space station in 2014.\nGrowing their own food in space is a big step for astronauts because it means they could go on longer missions in the future.\nThey've grown lettuce before but the zinnia is the first to bloom flowers.\nIt shows that it may be possible to grow other flowering crops like tomatoes.\nGrowing plants in space might be trickier than you think - watering flowers is much more difficult in a place without gravity!\nAnother problem was that the high levels of moisture in the air inside the space station led to mould growing on some leaves.\nThe astronauts cleaned the plants, cut away the mouldy leaves, and set up a fan to try and dry out the crop. But the high speed fan did its job a bit too well, and the plants got too dry.\nAstronaut Scott Kelly took charge of the flowers, and worked hard to revive them over Christmas, and on 12 January, a few petals started to peak out.\nWhen the flowers finally bloomed, Scott shared the photo saying 'first ever flower grown in space makes its debut!'\nTL;DR: ", "rollout/best_game/query_2": "Given the text: \u00a0The green zinnia flowers can be eaten.\nThe green goes really well for Space Quest's tablets Have you grown anywhere else based on food pairings you've worked with on Coruscant?\nStay tuned! \u2022[Jenny 199]\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " This is a cha cha, folk rendering which is an amazing explanation of humans and the world of Recoia itself, but I haven't found a valid source of this text.", "rollout/best_game/whole_game": "ROLE 0: The orange zinnia flowers can be eaten and were grown in the 'Veggie' laboratory, which was installed on the space station in 2014.\nGrowing their own food in space is a big step for astronauts because it means they could go on longer missions in the future.\nThey've grown lettuce before but the zinnia is the first to bloom flowers.\nIt shows that it may be possible to grow other flowering crops like tomatoes.\nGrowing plants in space might be trickier than you think - watering flowers is much more difficult in a place without gravity!\nAnother problem was that the high levels of moisture in the air inside the space station led to mould growing on some leaves.\nThe astronauts cleaned the plants, cut away the mouldy leaves, and set up a fan to try and dry out the crop. But the high speed fan did its job a bit too well, and the plants got too dry.\nAstronaut Scott Kelly took charge of the flowers, and worked hard to revive them over Christmas, and on 12 January, a few petals started to peak out.\nWhen the flowers finally bloomed, Scott shared the photo saying 'first ever flower grown in space makes its debut!'\nTL;DR: \nROLE 1: Given the text: \u00a0The green zinnia flowers can be eaten.\nThe green goes really well for Space Quest's tablets Have you grown anywhere else based on food pairings you've worked with on Coruscant?\nStay tuned! \u2022[Jenny 199]\nReconstruct the summarized text to the detailed:\nROLE 2:  This is a cha cha, folk rendering which is an amazing explanation of humans and the world of Recoia itself, but I haven't found a valid source of this text.", "rollout/best_game/overall_score": -91.82396080571533, "rollout/best_game/accuracy (r2)": 0.05937032208218649, "_this_batch_num_rl_token": 13033, "num_rl_rollout": 5, "lm_epoch": 0, "rl_epoch": 0, "step": 77, "total_data_token": 180817, "total_rl_token": 214033, "total_lm_token": 124254, "total_token": 338287, "completed_steps": 77, "rollout/num_train_sample": 160, "timestamp": 1772000333, "checkpoint_dir_name": null, "done": false, "training_iteration": 6, "trial_id": "4849cf6b", "date": "2026-02-25_14-18-53", "time_this_iter_s": 20.557494401931763, "time_total_s": 187.25502276420593, "pid": 2003771, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "on_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.5456347144827906, "len_pen": 1.0, "accuracy_w2": 2.5739712587324886, "len_pen2": 1.0, "threshold": 0.0010903843281968525, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 187.25502276420593, "iterations_since_restore": 6}
+{"rollout/num_samples": 160, "rollout/avg_q1_length": 396.15625, "rollout/std_q1_length": 229.18820190429688, "rollout/gen/avg_score": -473.4935607910156, "rollout/gen/std_score": 791.6729736328125, "rollout/gen/avg_r1_length": 52.5, "rollout/gen/std_r1_length": 9.721940994262695, "rollout/gen/avg_r1_score": -134.97665405273438, "rollout/gen/std_r1_score": 47.41380310058594, "rollout/gen/avg_r1_accuracy": 0.11103230714797974, "rollout/gen/std_r1_accuracy": 0.033921390771865845, "rollout/gen/avg_r2_length": 408.84375, "rollout/gen/std_r2_length": 162.98414611816406, "rollout/gen/avg_r2_score": -558.122802734375, "rollout/gen/std_r2_score": 864.8844604492188, "rollout/gen/avg_r2_accuracy": 0.08677195757627487, "rollout/gen/std_r2_accuracy": 0.03898562863469124, "rollout/best_game/query_1": "(CNN) -- A man who pleaded guilty to perpetrating a hoax by saying his 6-year-old son was drifting over Colorado in a balloon began his jail sentence Monday. Richard Heene turned himself in Monday morning at the Larimer County Detention Center in Fort Collins, Colorado, said Dean Karges, a detention services specialist at the jail. Heene pleaded guilty in November to a felony count of attempting to influence a public servant. Prosecutors said he and his wife staged their son's disappearance to generate publicity for themselves because they wanted to star in a reality television show. A judge sentenced Heene to spend 90 days in custody. He will spend 30 days and nights in jail but is eligible for work release during the last 60 days of his sentence, said Linda Jensen, a spokeswoman for the district attorney. If he has a job during that time, he would get out of jail during the day and return to spend the night, she said. He also must serve four years on probation and complete 100 hours of community service each year. Heene told authorities in October that his son, Falcon, was drifting over eastern Colorado in a homemade balloon that had come loose from its moorings in the family's backyard. Authorities raced to track the balloon while puzzling over ways to rescue the boy inside. Their efforts received widespread news coverage that riveted people around the nation. \"This has gotten more media attention than any other case we've ever had here,\" Jensen said. When the balloon came to rest in a field, however, Heene's son was not inside. The boy later was found hiding in the family's house. That night, during an interview with CNN's \"Larry King Live,\" the boy gave authorities what Larimer County Sheriff Jim Alderden called an \"Aha! moment.\" \"You guys said we did this for the show,\" Falcon said in the interview after his father asked him why he had not come out from hiding when he heard his parents calling for him. Later in the interview the Heenes said their son was simply confused. Authorities said Heene's wife, Mayumi, later admitted the whole thing was a hoax and that Falcon was safe in their home the whole time. She pleaded guilty to a misdemeanor charge of false reporting to authorities. She was sentenced to 20 days in jail, but her term will begin after her husband's sentence ends so their children will have a parent able to care for them. Richard Heene recently maintained on CNN's \"Larry King Live\" that the incident was not part of a plan for fame. Alderden said Heene's claim \"shocked\" him. \"The evidence against Mr. Heene and Mayumi at this point is really overwhelming,\" the sheriff said. \"There is no doubt in my mind that this thing was a hoax, and I really doubt that there's very few people in America who don't understand at this point that this was an elaborate hoax perpetrated by Richard and Mayumi.\"\nTL;DR: ", "rollout/best_game/query_2": "Given the text: \u00a0Dennis and internet troll deleted part of IU/LA girl's blog to spur internet campaign\nRead or Share this story: http://ndnbc.co/1IMopNw\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " \u00a0\nEXPOSITION OF HE FACTS.\n\u00a0 Special thanks to\u00a0 Phoho Priest Free cup\u00a0\u00a0for supporting children in school.", "rollout/best_game/whole_game": "ROLE 0: (CNN) -- A man who pleaded guilty to perpetrating a hoax by saying his 6-year-old son was drifting over Colorado in a balloon began his jail sentence Monday. Richard Heene turned himself in Monday morning at the Larimer County Detention Center in Fort Collins, Colorado, said Dean Karges, a detention services specialist at the jail. Heene pleaded guilty in November to a felony count of attempting to influence a public servant. Prosecutors said he and his wife staged their son's disappearance to generate publicity for themselves because they wanted to star in a reality television show. A judge sentenced Heene to spend 90 days in custody. He will spend 30 days and nights in jail but is eligible for work release during the last 60 days of his sentence, said Linda Jensen, a spokeswoman for the district attorney. If he has a job during that time, he would get out of jail during the day and return to spend the night, she said. He also must serve four years on probation and complete 100 hours of community service each year. Heene told authorities in October that his son, Falcon, was drifting over eastern Colorado in a homemade balloon that had come loose from its moorings in the family's backyard. Authorities raced to track the balloon while puzzling over ways to rescue the boy inside. Their efforts received widespread news coverage that riveted people around the nation. \"This has gotten more media attention than any other case we've ever had here,\" Jensen said. When the balloon came to rest in a field, however, Heene's son was not inside. The boy later was found hiding in the family's house. That night, during an interview with CNN's \"Larry King Live,\" the boy gave authorities what Larimer County Sheriff Jim Alderden called an \"Aha! moment.\" \"You guys said we did this for the show,\" Falcon said in the interview after his father asked him why he had not come out from hiding when he heard his parents calling for him. Later in the interview the Heenes said their son was simply confused. Authorities said Heene's wife, Mayumi, later admitted the whole thing was a hoax and that Falcon was safe in their home the whole time. She pleaded guilty to a misdemeanor charge of false reporting to authorities. She was sentenced to 20 days in jail, but her term will begin after her husband's sentence ends so their children will have a parent able to care for them. Richard Heene recently maintained on CNN's \"Larry King Live\" that the incident was not part of a plan for fame. Alderden said Heene's claim \"shocked\" him. \"The evidence against Mr. Heene and Mayumi at this point is really overwhelming,\" the sheriff said. \"There is no doubt in my mind that this thing was a hoax, and I really doubt that there's very few people in America who don't understand at this point that this was an elaborate hoax perpetrated by Richard and Mayumi.\"\nTL;DR: \nROLE 1: Given the text: \u00a0Dennis and internet troll deleted part of IU/LA girl's blog to spur internet campaign\nRead or Share this story: http://ndnbc.co/1IMopNw\nReconstruct the summarized text to the detailed:\nROLE 2:  \u00a0\nEXPOSITION OF HE FACTS.\n\u00a0 Special thanks to\u00a0 Phoho Priest Free cup\u00a0\u00a0for supporting children in school.", "rollout/best_game/overall_score": -73.18585967105787, "rollout/best_game/accuracy (r2)": 0.01520912547528517, "_this_batch_num_rl_token": 12677, "num_rl_rollout": 6, "lm_epoch": 0, "rl_epoch": 0, "step": 96, "total_data_token": 223153, "total_rl_token": 265099, "total_lm_token": 153913, "total_token": 419012, "completed_steps": 96, "rollout/num_train_sample": 160, "timestamp": 1772000353, "checkpoint_dir_name": null, "done": false, "training_iteration": 7, "trial_id": "4849cf6b", "date": "2026-02-25_14-19-13", "time_this_iter_s": 20.38148522377014, "time_total_s": 207.63650798797607, "pid": 2003771, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "on_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.5456347144827906, "len_pen": 1.0, "accuracy_w2": 2.5739712587324886, "len_pen2": 1.0, "threshold": 0.0010903843281968525, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 207.63650798797607, "iterations_since_restore": 7}
+{"rl_info/A2G": -1.1147022247314453, "rl_info/entropy": 3.0628790855407715, "rl_info/total_token": 2750.0, "rl_info/advantage_b4_norm": -563.918701171875, "rl_info/kl_w_ref": 0.0, "train/rl_loss": 111.46991729736328, "train/lm_loss": 6.745026588439941, "train/total_loss": 118.2149429321289, "gigaword/rouge1": 0.01588794208751672, "gigaword/rouge2": 0.0030682203925902537, "gigaword/rougeL": 0.015696270262763597, "gigaword/rougeLsum": 0.015056566514776864, "gigaword/bertscore_precision": 0.5697575241327286, "gigaword/bertscore_recall": 0.6501910880208015, "gigaword/bertscore_f1": 0.6064878015220165, "xsum/rouge1": 0.09303656213666846, "xsum/rouge2": 0.01669809664630185, "xsum/rougeL": 0.07840390008374683, "xsum/rougeLsum": 0.0756508330449655, "xsum/bertscore_precision": 0.6849921693404516, "xsum/bertscore_recall": 0.6992477228244146, "xsum/bertscore_f1": 0.6882759034633636, "samsum/rouge1": 0.06261417581415757, "samsum/rouge2": 0.014579625637229768, "samsum/rougeL": 0.05344327595304904, "samsum/rougeLsum": 0.04368849458568232, "samsum/bertscore_precision": 0.6274227847655615, "samsum/bertscore_recall": 0.6826510578393936, "samsum/bertscore_f1": 0.6529552042484283, "cnndm/rouge1": 0.16249304636580647, "cnndm/rouge2": 0.06028809360755266, "cnndm/rougeL": 0.13571205588993293, "cnndm/rougeLsum": 0.147324355852848, "cnndm/bertscore_precision": 0.7059448113044103, "cnndm/bertscore_recall": 0.7426582922538122, "cnndm/bertscore_f1": 0.7225645929574966, "eval_agg/avg_all_rougef": 0.0621025946797243, "eval_agg/avg_all_bertf": 0.6675708755478262, "eval_agg/avg_all": 0.3648367351137753, "num_rl_rollout": 6, "lm_epoch": 0, "rl_epoch": 0, "step": 100, "total_data_token": 230066, "total_rl_token": 277401, "total_lm_token": 160826, "total_token": 438227, "completed_steps": 100, "tune_objective": 0.8340927094839546, "timestamp": 1772000364, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 8, "trial_id": "4849cf6b", "date": "2026-02-25_14-19-24", "time_this_iter_s": 10.895561218261719, "time_total_s": 218.5320692062378, "pid": 2003771, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "on_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.5456347144827906, "len_pen": 1.0, "accuracy_w2": 2.5739712587324886, "len_pen2": 1.0, "threshold": 0.0010903843281968525, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 218.5320692062378, "iterations_since_restore": 8}
+{"rollout/num_samples": 160, "rollout/avg_q1_length": 337.1875, "rollout/std_q1_length": 236.70498657226562, "rollout/gen/avg_score": -709.9044189453125, "rollout/gen/std_score": 1688.951416015625, "rollout/gen/avg_r1_length": 44.21875, "rollout/gen/std_r1_length": 16.54852867126465, "rollout/gen/avg_r1_score": -108.00047302246094, "rollout/gen/std_r1_score": 64.93853759765625, "rollout/gen/avg_r1_accuracy": 0.11300493031740189, "rollout/gen/std_r1_accuracy": 0.04444945603609085, "rollout/gen/avg_r2_length": 408.3359375, "rollout/gen/std_r2_length": 156.6328887939453, "rollout/gen/avg_r2_score": -860.38037109375, "rollout/gen/std_r2_score": 1859.0791015625, "rollout/gen/avg_r2_accuracy": 0.08834926038980484, "rollout/gen/std_r2_accuracy": 0.04424477741122246, "rollout/best_game/query_1": "The answer, as you probably guessed, involves debating the Wales Bill, with Monday's committee stage ending at 11:12pm.\nThe Lords don't, by tradition, have votes during committee stages. Instead, amendments are introduced, moved and debated before being withdrawn after peers have had the chance to find out if ministers might change their mind later during the legislative process - or whether there is such a groundswell of support to change the Bill that the government might be defeated.\nPeers spent an hour of that time debating whether a referendum should be held before the Welsh Government gets the power to vary income tax rates.\nThe law, as it now stands, says there should be a plebiscite but the Wales Bill scraps that requirement, much to the annoyance of Welsh Labour grandees Lords Hain, Murphy and Kinnock.\nThe debate, perhaps inevitably, featured reflections, prompted by recent experience, on the recent nature of referendums. Lord Kinnock spoke of his own bruises and scars from the Brexit vote, but argued referendums were justifiable in a parliamentary democracy \"when there is a proposal to change the way in which we are governed\".\nHe argued that the proposal to give Wales tax-varying powers was made \"in the absolutely certain and cynical knowledge that it would not be exercised\" and that there \"was and is no evident support among the public for the idea of income tax-raising or income tax-varying powers to be allocated to the Welsh Assembly\".\nWales Office Minister Lord Bourne disagreed, telling peers: \"I strongly and sincerely believe that if we were to have a referendum, it would be carried\".\nFormer Labour MP Lord Howarth of Newport, suggested the Conservatives were \"sliding away\" from a clear manifesto commitment to only devolve tax powers after a referendum.\nThe Welsh Conservative manifesto referred to the issue in a passage on what politicians call \"fair funding\". It said:  \"We will do this by putting in place a floor in the level of relative Welsh Government funding in the expectation that the Welsh Government will hold a referendum on income tax raising powers.\n\"We will bring greater financial accountability to the Welsh Assembly by ensuring that the Welsh Government becomes responsible for raising more of the money it spends.\"\nWhether the UK government is sliding away from that or not, Labour and Liberal Democrat frontbenchers support the scrapping of a referendum requirement and this part of the Bill looks unlikely to change.\nTheir lordships moved on (eventually) to discuss the list of powers to be kept at Westminster. Lord Bourne politely agreed to look at them.\nFormer Plaid Cymru leader Lord Wigley tabled an amendment designed to ensure the Welsh Government can continue to support the Welsh language and culture in Patagonia.\nLabour peer (and historian) Lord Morgan reflected that \"the Welsh community in Chubut rather benefited from the Falklands War because the Argentine government were rather anxious to show that they were solicitous of the needs of cultural minorities in their country\".\nLord Bourne said the Wales Bill would already allow the Welsh Government to continue its support.\nYou can read part one of the debate here and part two here. Peers return to the subject on Wednesday, November 16.\nTL;DR: ", "rollout/best_game/query_2": "Given the text: \u00a0A Welsh Party member entirely and wholly stands against the UK sending Europe while advocating an Anglo-American free-trade deal.\nThis post has been updated as on Wednesday evening.\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " Coordination of the Euro-Anglo-American Economic Leaders suggests a plausible trading arrangement. Background training of Assistant Market Regulator Michael Bathurst for Euro Coalition Funding. General plans for the GB boss Wavi Gindets for Euro Coalition Funding. Top Labour figures & short posts on the GB forum Gacta Sacha. September every Wednesday & New Year's Truce scrum. 20 generals also doing EPB Rac de France climate advisor The Green Party ministers Climate Secretary Dr Chris Grayling, Chancellor of the Exchequer Diane Abbott, Cabinet Secretary Deborah Hodge. SPT Staff in the ministries of agriculture & fisheries Rick Rogers , MPE, Cabinet Secretary Jo Clark Ellis, MEP Richard Salter, Cabinet Secretary Dean Baker and Defence Secretary David Fallon.\nPosted by Irene James at 4:24 PM", "rollout/best_game/whole_game": "ROLE 0: The answer, as you probably guessed, involves debating the Wales Bill, with Monday's committee stage ending at 11:12pm.\nThe Lords don't, by tradition, have votes during committee stages. Instead, amendments are introduced, moved and debated before being withdrawn after peers have had the chance to find out if ministers might change their mind later during the legislative process - or whether there is such a groundswell of support to change the Bill that the government might be defeated.\nPeers spent an hour of that time debating whether a referendum should be held before the Welsh Government gets the power to vary income tax rates.\nThe law, as it now stands, says there should be a plebiscite but the Wales Bill scraps that requirement, much to the annoyance of Welsh Labour grandees Lords Hain, Murphy and Kinnock.\nThe debate, perhaps inevitably, featured reflections, prompted by recent experience, on the recent nature of referendums. Lord Kinnock spoke of his own bruises and scars from the Brexit vote, but argued referendums were justifiable in a parliamentary democracy \"when there is a proposal to change the way in which we are governed\".\nHe argued that the proposal to give Wales tax-varying powers was made \"in the absolutely certain and cynical knowledge that it would not be exercised\" and that there \"was and is no evident support among the public for the idea of income tax-raising or income tax-varying powers to be allocated to the Welsh Assembly\".\nWales Office Minister Lord Bourne disagreed, telling peers: \"I strongly and sincerely believe that if we were to have a referendum, it would be carried\".\nFormer Labour MP Lord Howarth of Newport, suggested the Conservatives were \"sliding away\" from a clear manifesto commitment to only devolve tax powers after a referendum.\nThe Welsh Conservative manifesto referred to the issue in a passage on what politicians call \"fair funding\". It said:  \"We will do this by putting in place a floor in the level of relative Welsh Government funding in the expectation that the Welsh Government will hold a referendum on income tax raising powers.\n\"We will bring greater financial accountability to the Welsh Assembly by ensuring that the Welsh Government becomes responsible for raising more of the money it spends.\"\nWhether the UK government is sliding away from that or not, Labour and Liberal Democrat frontbenchers support the scrapping of a referendum requirement and this part of the Bill looks unlikely to change.\nTheir lordships moved on (eventually) to discuss the list of powers to be kept at Westminster. Lord Bourne politely agreed to look at them.\nFormer Plaid Cymru leader Lord Wigley tabled an amendment designed to ensure the Welsh Government can continue to support the Welsh language and culture in Patagonia.\nLabour peer (and historian) Lord Morgan reflected that \"the Welsh community in Chubut rather benefited from the Falklands War because the Argentine government were rather anxious to show that they were solicitous of the needs of cultural minorities in their country\".\nLord Bourne said the Wales Bill would already allow the Welsh Government to continue its support.\nYou can read part one of the debate here and part two here. Peers return to the subject on Wednesday, November 16.\nTL;DR: \nROLE 1: Given the text: \u00a0A Welsh Party member entirely and wholly stands against the UK sending Europe while advocating an Anglo-American free-trade deal.\nThis post has been updated as on Wednesday evening.\nReconstruct the summarized text to the detailed:\nROLE 2:  Coordination of the Euro-Anglo-American Economic Leaders suggests a plausible trading arrangement. Background training of Assistant Market Regulator Michael Bathurst for Euro Coalition Funding. General plans for the GB boss Wavi Gindets for Euro Coalition Funding. Top Labour figures & short posts on the GB forum Gacta Sacha. September every Wednesday & New Year's Truce scrum. 20 generals also doing EPB Rac de France climate advisor The Green Party ministers Climate Secretary Dr Chris Grayling, Chancellor of the Exchequer Diane Abbott, Cabinet Secretary Deborah Hodge. SPT Staff in the ministries of agriculture & fisheries Rick Rogers , MPE, Cabinet Secretary Jo Clark Ellis, MEP Richard Salter, Cabinet Secretary Dean Baker and Defence Secretary David Fallon.\nPosted by Irene James at 4:24 PM", "rollout/best_game/overall_score": -202.98537803251656, "rollout/best_game/accuracy (r2)": 0.05052040158423137, "_this_batch_num_rl_token": 10790, "num_rl_rollout": 7, "lm_epoch": 0, "rl_epoch": 0, "step": 116, "total_data_token": 260080, "total_rl_token": 319111, "total_lm_token": 180050, "total_token": 499161, "completed_steps": 116, "rollout/num_train_sample": 160, "timestamp": 1772000383, "checkpoint_dir_name": null, "done": false, "training_iteration": 9, "trial_id": "4849cf6b", "date": "2026-02-25_14-19-43", "time_this_iter_s": 19.373114347457886, "time_total_s": 237.90518355369568, "pid": 2003771, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "on_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.5456347144827906, "len_pen": 1.0, "accuracy_w2": 2.5739712587324886, "len_pen2": 1.0, "threshold": 0.0010903843281968525, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 237.90518355369568, "iterations_since_restore": 9}
+{"rollout/num_samples": 160, "rollout/avg_q1_length": 355.40625, "rollout/std_q1_length": 244.96371459960938, "rollout/gen/avg_score": -584.4690551757812, "rollout/gen/std_score": 1324.5087890625, "rollout/gen/avg_r1_length": 49.40625, "rollout/gen/std_r1_length": 14.368108749389648, "rollout/gen/avg_r1_score": -128.7856903076172, "rollout/gen/std_r1_score": 55.14036560058594, "rollout/gen/avg_r1_accuracy": 0.1094593033194542, "rollout/gen/std_r1_accuracy": 0.033135026693344116, "rollout/gen/avg_r2_length": 401.4921875, "rollout/gen/std_r2_length": 158.7380828857422, "rollout/gen/avg_r2_score": -698.3898315429688, "rollout/gen/std_r2_score": 1459.525390625, "rollout/gen/avg_r2_accuracy": 0.08660686016082764, "rollout/gen/std_r2_accuracy": 0.035699211061000824, "rollout/best_game/query_1": "By . Paul Donnelley for MailOnline . California's attorney general said yesterday that she will appeal a federal court ruling that called the state's death penalty unconstitutional. The announcement by Attorney General Kamala Harris came after District Judge Cormac Carney in Los Angeles ruled last month that the state's death penalty takes too long to carry out, and that the unpredictable delays are arbitrary and unfair. Death penalty opponents have long argued that California's delays amounted to cruel and unusual punishment, which is unconstitutional. Until Judge Carney's ruling, the argument failed to persuade a judge. California Attorney General Kamala Harris is appealing a federal court ruling that called the state's death penalty unconstitutional . Harris, however, said the amount of time it takes to execute inmates in California ensures they receive due process. 'I am appealing the court's decision because it is not supported by the law, and it undermines important protections that our courts provide to defendants,' Attorney General Harris said in a prepared statement. 'This flawed ruling requires appellate review.' Capital punishment opponents had called on Attorney General Harris to let Judge Carney's ruling stand rather than risk it being overturned in the 9th US Circuit Court of Appeals. 'We hope the 9th Circuit will recognise that California's death penalty system is as broken and unconstitutional as Judge Cormac found,' Matt Cherry, the executive director of Death Penalty Focus, which seeks to abolish capital punishment, said in response to Harris's move. Death penalty backers supported Attorney General Harris's decision. 'It is obviously the correct decision to make,' said Kent Scheidegger, the top lawyer at the pro-death penalty Criminal Justice Legal Foundation in Sacramento. Judge Carney's ruling overturned the death sentence of Ernest Dewayne Jones (above), a Los Angeles man sentenced to die for the 1992 rape and murder of his girlfriend's mother . Mr Scheidegger was attending a death penalty conference for government lawyers in San Diego and said that the initial ruling by Judge Carney 'has been the talk in the hallways' among attendees. The San Francisco-based 9th Circuit is often viewed as a liberal-leaning court, but the three-judge panel that will consider the appeal by Harris will be randomly selected from the entire court of more than two dozen judges of varying political pedigrees. 'You never know what you're going to get,' Mr Scheidegger said of the 9th Circuit's three-judge panels. Attorney General Harris has said she personally opposes the death penalty but promised voters she would enforce state law. Judge Carney's ruling overturned the death sentence of Ernest Dewayne Jones, a Los Angeles man sentenced to die for the 1992 rape and murder of his girlfriend's mother. Since the current death penalty system was adopted 35 years ago, the judge noted, more than 900 people have been sentenced to death but only 13 have been executed. No executions have been carried out in California since 2006 after another federal judge ordered an overhaul of the state's lethal injection procedures . The judge called the death penalty an empty promise that violates the Eighth Amendment's protection against cruel and unusual punishment. 'Inordinate and unpredictable delay has resulted in a death penalty system in which very few of the hundreds of individuals sentenced to death have been, or even will be, executed by the state,' wrote Judge Carney, a George W. Bush appointee. He noted that death penalty appeals can last decades and, as a result, most condemned inmates are likely to die of natural causes before their executions are carried out. No executions have been carried out in California since 2006 after another federal judge ordered an overhaul of the state's lethal injection procedures. In addition, the Department of Corrections and Rehabilitation is drafting new lethal injection regulations after Governor Jerry Brown said the state would switch from a three-drug cocktail to a single-drug lethal injection. No executions can occur until the new rules are adopted.\nTL;DR: ", "rollout/best_game/query_2": "Given the text: \u00a0Last Review : Closing Amendment 61 brosmack and ex ante it \u00a0Type: Legal system types: power mag indent 2years2year : 1 and 10 2(g) apply to the original term \u00a04year : No when affirming an appealing Dr\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " If you are a lawyer, this application is particularly important, because the writ does not apply in all of these cases. \u00a0When the court decided that pupils are not correctly represented by the ABMJ, a second application could be required. However, each of these were unsuccessful; as the tally shows:", "rollout/best_game/whole_game": "ROLE 0: By . Paul Donnelley for MailOnline . California's attorney general said yesterday that she will appeal a federal court ruling that called the state's death penalty unconstitutional. The announcement by Attorney General Kamala Harris came after District Judge Cormac Carney in Los Angeles ruled last month that the state's death penalty takes too long to carry out, and that the unpredictable delays are arbitrary and unfair. Death penalty opponents have long argued that California's delays amounted to cruel and unusual punishment, which is unconstitutional. Until Judge Carney's ruling, the argument failed to persuade a judge. California Attorney General Kamala Harris is appealing a federal court ruling that called the state's death penalty unconstitutional . Harris, however, said the amount of time it takes to execute inmates in California ensures they receive due process. 'I am appealing the court's decision because it is not supported by the law, and it undermines important protections that our courts provide to defendants,' Attorney General Harris said in a prepared statement. 'This flawed ruling requires appellate review.' Capital punishment opponents had called on Attorney General Harris to let Judge Carney's ruling stand rather than risk it being overturned in the 9th US Circuit Court of Appeals. 'We hope the 9th Circuit will recognise that California's death penalty system is as broken and unconstitutional as Judge Cormac found,' Matt Cherry, the executive director of Death Penalty Focus, which seeks to abolish capital punishment, said in response to Harris's move. Death penalty backers supported Attorney General Harris's decision. 'It is obviously the correct decision to make,' said Kent Scheidegger, the top lawyer at the pro-death penalty Criminal Justice Legal Foundation in Sacramento. Judge Carney's ruling overturned the death sentence of Ernest Dewayne Jones (above), a Los Angeles man sentenced to die for the 1992 rape and murder of his girlfriend's mother . Mr Scheidegger was attending a death penalty conference for government lawyers in San Diego and said that the initial ruling by Judge Carney 'has been the talk in the hallways' among attendees. The San Francisco-based 9th Circuit is often viewed as a liberal-leaning court, but the three-judge panel that will consider the appeal by Harris will be randomly selected from the entire court of more than two dozen judges of varying political pedigrees. 'You never know what you're going to get,' Mr Scheidegger said of the 9th Circuit's three-judge panels. Attorney General Harris has said she personally opposes the death penalty but promised voters she would enforce state law. Judge Carney's ruling overturned the death sentence of Ernest Dewayne Jones, a Los Angeles man sentenced to die for the 1992 rape and murder of his girlfriend's mother. Since the current death penalty system was adopted 35 years ago, the judge noted, more than 900 people have been sentenced to death but only 13 have been executed. No executions have been carried out in California since 2006 after another federal judge ordered an overhaul of the state's lethal injection procedures . The judge called the death penalty an empty promise that violates the Eighth Amendment's protection against cruel and unusual punishment. 'Inordinate and unpredictable delay has resulted in a death penalty system in which very few of the hundreds of individuals sentenced to death have been, or even will be, executed by the state,' wrote Judge Carney, a George W. Bush appointee. He noted that death penalty appeals can last decades and, as a result, most condemned inmates are likely to die of natural causes before their executions are carried out. No executions have been carried out in California since 2006 after another federal judge ordered an overhaul of the state's lethal injection procedures. In addition, the Department of Corrections and Rehabilitation is drafting new lethal injection regulations after Governor Jerry Brown said the state would switch from a three-drug cocktail to a single-drug lethal injection. No executions can occur until the new rules are adopted.\nTL;DR: \nROLE 1: Given the text: \u00a0Last Review : Closing Amendment 61 brosmack and ex ante it \u00a0Type: Legal system types: power mag indent 2years2year : 1 and 10 2(g) apply to the original term \u00a04year : No when affirming an appealing Dr\nReconstruct the summarized text to the detailed:\nROLE 2:  If you are a lawyer, this application is particularly important, because the writ does not apply in all of these cases. \u00a0When the court decided that pupils are not correctly represented by the ABMJ, a second application could be required. However, each of these were unsuccessful; as the tally shows:", "rollout/best_game/overall_score": -221.2261609406865, "rollout/best_game/accuracy (r2)": 0.03970965425259054, "_this_batch_num_rl_token": 11373, "num_rl_rollout": 8, "lm_epoch": 0, "rl_epoch": 0, "step": 135, "total_data_token": 305025, "total_rl_token": 372793, "total_lm_token": 213622, "total_token": 586415, "completed_steps": 135, "rollout/num_train_sample": 160, "timestamp": 1772000404, "checkpoint_dir_name": null, "done": false, "training_iteration": 10, "trial_id": "4849cf6b", "date": "2026-02-25_14-20-04", "time_this_iter_s": 21.085517406463623, "time_total_s": 258.9907009601593, "pid": 2003771, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "on_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.5456347144827906, "len_pen": 1.0, "accuracy_w2": 2.5739712587324886, "len_pen2": 1.0, "threshold": 0.0010903843281968525, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 258.9907009601593, "iterations_since_restore": 10}
+{"rollout/num_samples": 160, "rollout/avg_q1_length": 380.09375, "rollout/std_q1_length": 261.0609130859375, "rollout/gen/avg_score": -461.421630859375, "rollout/gen/std_score": 794.3045043945312, "rollout/gen/avg_r1_length": 47.09375, "rollout/gen/std_r1_length": 15.775357246398926, "rollout/gen/avg_r1_score": -120.2005844116211, "rollout/gen/std_r1_score": 60.531185150146484, "rollout/gen/avg_r1_accuracy": 0.11411139369010925, "rollout/gen/std_r1_accuracy": 0.03983970731496811, "rollout/gen/avg_r2_length": 399.9609375, "rollout/gen/std_r2_length": 167.65330505371094, "rollout/gen/avg_r2_score": -546.7269287109375, "rollout/gen/std_r2_score": 867.3671264648438, "rollout/gen/avg_r2_accuracy": 0.08793585002422333, "rollout/gen/std_r2_accuracy": 0.0411004014313221, "rollout/best_game/query_1": "The Sportscotland Avalanche Information Service (SAIS) has been providing the service for several years.\nIt assesses avalanche hazard in six areas - Lochaber, Glen Coe, Creag Meagaidh, Southern Cairngorms , Northern Cairngorms and Torridon.\nThe reports are usually issued until mid-April.\nMembers of the service also regularly blog on conditions in the hills and mountains in these areas.\nTL;DR: ", "rollout/best_game/query_2": "Given the text: \u00a0Normal snow\nArctic 100% Lawless FogZone: No Symbols\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " rsvtr: The Delete! command might be useful to help rapid-start the process of rebuilding or refit some of the files you need, fixing that missing gls file or in the case of glsl.\nI hope this helps you understand the development of those models.", "rollout/best_game/whole_game": "ROLE 0: The Sportscotland Avalanche Information Service (SAIS) has been providing the service for several years.\nIt assesses avalanche hazard in six areas - Lochaber, Glen Coe, Creag Meagaidh, Southern Cairngorms , Northern Cairngorms and Torridon.\nThe reports are usually issued until mid-April.\nMembers of the service also regularly blog on conditions in the hills and mountains in these areas.\nTL;DR: \nROLE 1: Given the text: \u00a0Normal snow\nArctic 100% Lawless FogZone: No Symbols\nReconstruct the summarized text to the detailed:\nROLE 2:  rsvtr: The Delete! command might be useful to help rapid-start the process of rebuilding or refit some of the files you need, fixing that missing gls file or in the case of glsl.\nI hope this helps you understand the development of those models.", "rollout/best_game/overall_score": -78.31972460287074, "rollout/best_game/accuracy (r2)": 0.09830065359477125, "_this_batch_num_rl_token": 12163, "num_rl_rollout": 9, "lm_epoch": 0, "rl_epoch": 0, "step": 154, "total_data_token": 344315, "total_rl_token": 425765, "total_lm_token": 240749, "total_token": 666514, "completed_steps": 154, "rollout/num_train_sample": 160, "timestamp": 1772000426, "checkpoint_dir_name": null, "done": false, "training_iteration": 11, "trial_id": "4849cf6b", "date": "2026-02-25_14-20-26", "time_this_iter_s": 21.165728092193604, "time_total_s": 280.1564290523529, "pid": 2003771, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "on_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.5456347144827906, "len_pen": 1.0, "accuracy_w2": 2.5739712587324886, "len_pen2": 1.0, "threshold": 0.0010903843281968525, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 280.1564290523529, "iterations_since_restore": 11}
+{"rollout/num_samples": 160, "rollout/avg_q1_length": 346.65625, "rollout/std_q1_length": 187.431640625, "rollout/gen/avg_score": -408.055419921875, "rollout/gen/std_score": 235.26333618164062, "rollout/gen/avg_r1_length": 46.71875, "rollout/gen/std_r1_length": 14.551874160766602, "rollout/gen/avg_r1_score": -107.30181884765625, "rollout/gen/std_r1_score": 61.962921142578125, "rollout/gen/avg_r1_accuracy": 0.11443158984184265, "rollout/gen/std_r1_accuracy": 0.03059418685734272, "rollout/gen/avg_r2_length": 412.7578125, "rollout/gen/std_r2_length": 157.9619140625, "rollout/gen/avg_r2_score": -483.24383544921875, "rollout/gen/std_r2_score": 199.67166137695312, "rollout/gen/avg_r2_accuracy": 0.09222357720136642, "rollout/gen/std_r2_accuracy": 0.03707268089056015, "rollout/best_game/query_1": "Australia Post has launched a new service that gives local shoppers a U.S. postal address, so they can shop at websites that offer cheaper prices but only deliver within America. The postal service has set up a warehouse in Oregon that gives Australian consumers a U.S. address to tell online retailers to ship to,\u00a0before their parcels are forwarded to their Australian address. The company initiated its ShopMate service last week in order to capitalise on the Christmas gift-buying rush. Scroll down for video . Australia Post launched ShopMate service for online shoppers last week in time for Christmas gift buying . A review of clothing items done by consumer group Choice this year found Australians are paying up to 60 percent more than U.S. consumers for exactly the same products. 'Whether it's IT products, cosmetics or clothes, it\u2019s never been easier for Aussie consumers to compare prices between Australia and overseas stores and it's not pretty viewing,' said Choice spokesman Tom Godfrey. Parcel Post general manager Kelly Heintz says the service will cut the annoying price difference suffered by Australian consumers. Ms Heintz says the service also means Australians won't have to wait any longer than Americans for products that haven't yet hit local stores. ShopMate isn't the first service of its kind offered in Australia, but it's the first that won't rely on international couriers that can sometimes be more costly. Subscribers can have their parcels sent to their 'US address' before they're forwarded to Australia . Australia Post charges a base rate of $24.95 a parcel, with a weight rate of $5.95 per 500 grams. Mr Godfrey said Choice welcomed Australia Post's decision, but said the lengths consumers have to go to avoid being ripped off were 'staggering'. 'The fact that Australia Post can set up a service like this to help people avoid geo-blocking is testament to how broken online shopping is in Australia,'\u00a0Mr Godfrey said. 'It's time to end digital discrimination down under and stop this farce that sees consumers parcelling up goods overseas to access affordable prices.' Rolling hills and road near Ashland, Oregon.\u00a0The postal service set up a warehouse in Oregon that essentially gives Australian consumers a US address .\nTL;DR: ", "rollout/best_game/query_2": "Given the text: \u00a0everytime someone pays $160 for a 50 per on-demand parcel delivered to their door, that's fraud. Your postal address doesn't mean a stranger will be here to drag you down 'til \u00a31,000 to get you elsewhere. Fairfax Media\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " FCAP \u00a315 \u00a320 one night after Christmas 1927 - $140 a afternoon atop the fare for your next secure storage check? \u00a0Those 30 one-hour walk-in bins owner keeps mint counts past \u00a330 an hour.", "rollout/best_game/whole_game": "ROLE 0: Australia Post has launched a new service that gives local shoppers a U.S. postal address, so they can shop at websites that offer cheaper prices but only deliver within America. The postal service has set up a warehouse in Oregon that gives Australian consumers a U.S. address to tell online retailers to ship to,\u00a0before their parcels are forwarded to their Australian address. The company initiated its ShopMate service last week in order to capitalise on the Christmas gift-buying rush. Scroll down for video . Australia Post launched ShopMate service for online shoppers last week in time for Christmas gift buying . A review of clothing items done by consumer group Choice this year found Australians are paying up to 60 percent more than U.S. consumers for exactly the same products. 'Whether it's IT products, cosmetics or clothes, it\u2019s never been easier for Aussie consumers to compare prices between Australia and overseas stores and it's not pretty viewing,' said Choice spokesman Tom Godfrey. Parcel Post general manager Kelly Heintz says the service will cut the annoying price difference suffered by Australian consumers. Ms Heintz says the service also means Australians won't have to wait any longer than Americans for products that haven't yet hit local stores. ShopMate isn't the first service of its kind offered in Australia, but it's the first that won't rely on international couriers that can sometimes be more costly. Subscribers can have their parcels sent to their 'US address' before they're forwarded to Australia . Australia Post charges a base rate of $24.95 a parcel, with a weight rate of $5.95 per 500 grams. Mr Godfrey said Choice welcomed Australia Post's decision, but said the lengths consumers have to go to avoid being ripped off were 'staggering'. 'The fact that Australia Post can set up a service like this to help people avoid geo-blocking is testament to how broken online shopping is in Australia,'\u00a0Mr Godfrey said. 'It's time to end digital discrimination down under and stop this farce that sees consumers parcelling up goods overseas to access affordable prices.' Rolling hills and road near Ashland, Oregon.\u00a0The postal service set up a warehouse in Oregon that essentially gives Australian consumers a US address .\nTL;DR: \nROLE 1: Given the text: \u00a0everytime someone pays $160 for a 50 per on-demand parcel delivered to their door, that's fraud. Your postal address doesn't mean a stranger will be here to drag you down 'til \u00a31,000 to get you elsewhere. Fairfax Media\nReconstruct the summarized text to the detailed:\nROLE 2:  FCAP \u00a315 \u00a320 one night after Christmas 1927 - $140 a afternoon atop the fare for your next secure storage check? \u00a0Those 30 one-hour walk-in bins owner keeps mint counts past \u00a330 an hour.", "rollout/best_game/overall_score": -105.13836144763887, "rollout/best_game/accuracy (r2)": 0.01594896331738437, "_this_batch_num_rl_token": 11093, "num_rl_rollout": 10, "lm_epoch": 0, "rl_epoch": 0, "step": 174, "total_data_token": 385767, "total_rl_token": 478467, "total_lm_token": 271108, "total_token": 749575, "completed_steps": 174, "rollout/num_train_sample": 160, "timestamp": 1772000447, "checkpoint_dir_name": null, "done": false, "training_iteration": 12, "trial_id": "4849cf6b", "date": "2026-02-25_14-20-47", "time_this_iter_s": 21.01446533203125, "time_total_s": 301.17089438438416, "pid": 2003771, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "on_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.5456347144827906, "len_pen": 1.0, "accuracy_w2": 2.5739712587324886, "len_pen2": 1.0, "threshold": 0.0010903843281968525, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 301.17089438438416, "iterations_since_restore": 12}
+{"rollout/num_samples": 160, "rollout/avg_q1_length": 313.6875, "rollout/std_q1_length": 212.19488525390625, "rollout/gen/avg_score": -507.5830078125, "rollout/gen/std_score": 1096.676025390625, "rollout/gen/avg_r1_length": 50.5, "rollout/gen/std_r1_length": 11.864830017089844, "rollout/gen/avg_r1_score": -120.52194213867188, "rollout/gen/std_r1_score": 55.98723220825195, "rollout/gen/avg_r1_accuracy": 0.1082703173160553, "rollout/gen/std_r1_accuracy": 0.0393538735806942, "rollout/gen/avg_r2_length": 386.0859375, "rollout/gen/std_r2_length": 176.6576385498047, "rollout/gen/avg_r2_score": -604.3482666015625, "rollout/gen/std_r2_score": 1207.389404296875, "rollout/gen/avg_r2_accuracy": 0.08306202292442322, "rollout/gen/std_r2_accuracy": 0.04260895773768425, "rollout/best_game/query_1": "Vincent: So...\nErica: So... ?\nVincent: Dinner? ;)\nErica: With you? Always hon :D\nVincent: :*\nErica: Where are you thinking of going?\nVincent: Oh, I was thinking Rosita's, unless you have a better idea ;)\nErica: Rosita's?!! How?? \nErica: That place is notoriously difficult to get into ever since it opened!\nVincent: Good thing I have two reservations then :D\nErica: ...I love you.\nVincent: I love you too :) I'll see you tonight x\nErica: Wait, what time are you picking me up??\nVincent: 7pm okay? I have the reservations for 8pm, so we should make it :)\nErica: Make it 7:15 and you've got yourself a deal!\nVincent: Okay, love you and see you later xxx\nErica: Can't wait!!! <3 \nTL;DR: ", "rollout/best_game/query_2": "Given the text: \u00a0San Francisco or maybe Baltimore here ? \nVP Tonys tsvock\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " \u00a0subsection: \"Drowned with the raw seeds of Baldur's Gate 2 and yet it does the unthinkable.\"\nRemember it.\nBeneath it, Koth knows.\nPhoto by astarradocious2014", "rollout/best_game/whole_game": "ROLE 0: Vincent: So...\nErica: So... ?\nVincent: Dinner? ;)\nErica: With you? Always hon :D\nVincent: :*\nErica: Where are you thinking of going?\nVincent: Oh, I was thinking Rosita's, unless you have a better idea ;)\nErica: Rosita's?!! How?? \nErica: That place is notoriously difficult to get into ever since it opened!\nVincent: Good thing I have two reservations then :D\nErica: ...I love you.\nVincent: I love you too :) I'll see you tonight x\nErica: Wait, what time are you picking me up??\nVincent: 7pm okay? I have the reservations for 8pm, so we should make it :)\nErica: Make it 7:15 and you've got yourself a deal!\nVincent: Okay, love you and see you later xxx\nErica: Can't wait!!! <3 \nTL;DR: \nROLE 1: Given the text: \u00a0San Francisco or maybe Baltimore here ? \nVP Tonys tsvock\nReconstruct the summarized text to the detailed:\nROLE 2:  \u00a0subsection: \"Drowned with the raw seeds of Baldur's Gate 2 and yet it does the unthinkable.\"\nRemember it.\nBeneath it, Koth knows.\nPhoto by astarradocious2014", "rollout/best_game/overall_score": -68.70136121295141, "rollout/best_game/accuracy (r2)": 0.06711409395973154, "_this_batch_num_rl_token": 10038, "num_rl_rollout": 11, "lm_epoch": 0, "rl_epoch": 0, "step": 193, "total_data_token": 425949, "total_rl_token": 532795, "total_lm_token": 301252, "total_token": 834047, "completed_steps": 193, "rollout/num_train_sample": 160, "timestamp": 1772000467, "checkpoint_dir_name": null, "done": false, "training_iteration": 13, "trial_id": "4849cf6b", "date": "2026-02-25_14-21-07", "time_this_iter_s": 20.381215810775757, "time_total_s": 321.5521101951599, "pid": 2003771, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "on_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.5456347144827906, "len_pen": 1.0, "accuracy_w2": 2.5739712587324886, "len_pen2": 1.0, "threshold": 0.0010903843281968525, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 321.5521101951599, "iterations_since_restore": 13}
+{"rl_info/A2G": -0.37524792551994324, "rl_info/entropy": 3.0526039600372314, "rl_info/total_token": 2558.0, "rl_info/advantage_b4_norm": -546.3909912109375, "rl_info/kl_w_ref": 0.0, "train/rl_loss": 37.52448654174805, "train/lm_loss": 7.563488006591797, "train/total_loss": 45.087974548339844, "gigaword/rouge1": 0.01667789707686308, "gigaword/rouge2": 0.0029983382807010034, "gigaword/rougeL": 0.0164463352045245, "gigaword/rougeLsum": 0.015819796225858516, "gigaword/bertscore_precision": 0.5691623140871525, "gigaword/bertscore_recall": 0.6494210176169872, "gigaword/bertscore_f1": 0.6057747349143028, "xsum/rouge1": 0.10789241381461874, "xsum/rouge2": 0.017411917650488173, "xsum/rougeL": 0.08161228007379971, "xsum/rougeLsum": 0.07600640685007402, "xsum/bertscore_precision": 0.7069710244735082, "xsum/bertscore_recall": 0.7182930807272593, "xsum/bertscore_f1": 0.709260513385137, "samsum/rouge1": 0.06389163522359607, "samsum/rouge2": 0.013837504733094669, "samsum/rougeL": 0.05758906202585737, "samsum/rougeLsum": 0.04685234171116379, "samsum/bertscore_precision": 0.6321228841940562, "samsum/bertscore_recall": 0.6904535293579102, "samsum/bertscore_f1": 0.6595110396544138, "cnndm/rouge1": 0.11348436385845954, "cnndm/rouge2": 0.036006484648236046, "cnndm/rougeL": 0.10121169939977089, "cnndm/rougeLsum": 0.1025246204723157, "cnndm/bertscore_precision": 0.6778418372074763, "cnndm/bertscore_recall": 0.7278565466403961, "cnndm/bertscore_f1": 0.7001827309528986, "eval_agg/avg_all_rougef": 0.054391443578088866, "eval_agg/avg_all_bertf": 0.668682254726688, "eval_agg/avg_all": 0.36153684915238843, "num_rl_rollout": 11, "lm_epoch": 0, "rl_epoch": 0, "step": 200, "total_data_token": 437934, "total_rl_token": 552226, "total_lm_token": 313237, "total_token": 865463, "completed_steps": 200, "tune_objective": 0.8025069838947868, "timestamp": 1772000479, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": true, "training_iteration": 14, "trial_id": "4849cf6b", "date": "2026-02-25_14-21-19", "time_this_iter_s": 12.30050778388977, "time_total_s": 333.8526179790497, "pid": 2003771, "hostname": "albert", "node_ip": "10.2.1.37", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "on_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250}, "rollout_config": {"accuracy_w": 0.5456347144827906, "len_pen": 1.0, "accuracy_w2": 2.5739712587324886, "len_pen2": 1.0, "threshold": 0.0010903843281968525, "similarity_fn": "rouge", "max_ctx_len": 1000, "sampling_params_1": {"n": 1, "min_tokens": 8, "max_tokens": 56, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 20, "max_tokens": 512, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 333.8526179790497, "iterations_since_restore": 14}