Upload ray_tune_logs/result.json with huggingface_hub

824edf6 verified about 2 months ago

185 kB

Invalid JSON: Unexpected non-whitespace character after JSONat line 2, column 1

	{"cnndm/rouge1": 0.19341703499464952, "cnndm/rouge2": 0.07021751542927328, "cnndm/rougeL": 0.15840943412501923, "cnndm/rougeLsum": 0.1640181196713419, "cnndm/bertscore_precision": 0.7042360544204712, "cnndm/bertscore_recall": 0.7557522475719451, "cnndm/bertscore_f1": 0.7285408675670624, "eval_agg/avg_all_rougef": 0.14651552605507098, "eval_agg/avg_all_bertf": 0.7285408675670624, "eval_agg/avg_all": 0.4375281968110667, "num_rl_rollout": 0, "lm_epoch": 0, "rl_epoch": 0, "step": 0, "total_data_token": 0, "total_rl_token": 0, "total_lm_token": 0, "total_token": 0, "completed_steps": 0, "tune_objective": 0.5159872827449616, "timestamp": 1772075054, "checkpoint_dir_name": null, "done": false, "training_iteration": 1, "trial_id": "284c96d2", "date": "2026-02-26_11-04-14", "time_this_iter_s": 48.814223289489746, "time_total_s": 48.814223289489746, "pid": 208365, "hostname": "candle", "node_ip": "10.2.1.32", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "sim_with_one_golden__cnndm_train", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 0.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 32, "max_new_tokens": 135}, "rollout_config": {"accuracy_w": 1.96282248134428, "len_pen": 1.0, "accuracy_w2": 19.549524484259873, "len_pen2": 1.0, "threshold": 0.01605191133358762, "similarity_fn": "rouge", "max_ctx_len": 860, "sampling_params_1": {"n": 1, "min_tokens": 32, "max_tokens": 135, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 5, "max_tokens": 860, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___10"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/rouge2___2.0", "cnndm/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 48.814223289489746, "iterations_since_restore": 1}
	{"rollout/num_samples": 160, "rollout/avg_q1_length": 716.9375, "rollout/std_q1_length": 187.89649963378906, "rollout/gen/avg_score": -1055.2186279296875, "rollout/gen/std_score": 1953.6004638671875, "rollout/gen/avg_r1_length": 121.65625, "rollout/gen/std_r1_length": 28.725181579589844, "rollout/gen/avg_r1_score": -200.4755096435547, "rollout/gen/std_r1_score": 69.4067611694336, "rollout/gen/avg_r1_accuracy": 0.1441212296485901, "rollout/gen/std_r1_accuracy": 0.019349489361047745, "rollout/gen/avg_r2_length": 624.5859375, "rollout/gen/std_r2_length": 317.8156433105469, "rollout/gen/avg_r2_score": -1268.9044189453125, "rollout/gen/std_r2_score": 2132.351806640625, "rollout/gen/avg_r2_accuracy": 0.11019843816757202, "rollout/gen/std_r2_accuracy": 0.04400750994682312, "rollout/best_game/query_1": "Atlanta (CNN) -- Architect Ryan Gravel has lived in Atlanta, Georgia for more than 20 years, watching the city grow as it strives to compete among the world's finest. \"Great cities are great places to live and they're really vibrant and diverse,\" Gravel said. \"Atlanta has a lot of those qualities, but it really needs to be sort of kick-started and prepare for the future.\" With a growth in suburban sprawl in recent decades, Atlanta has acquired a reputation for unbearable traffic and poor public transportation. But a project originally inspired by Gravel's 1999 graduate thesis is helping to change that perception, tapping into Atlanta's potential and steering it onto a more sustainable path for the 21st century. The Atlanta BeltLine is revitalizing a 35-kilometer (22-mile) loop of abandoned railway line, transforming the land and adjacent brownfield sites into attractive new public spaces. With walking trails, cycle paths and parks dotted along the route, the project is helping connect 45 city neighborhoods. When it is completed, the BeltLine will have regenerated around 3,000 acres of land. See Also: LA's green city challenge . Construction began in 2007, with the first trail (the West End) opening the following year. A Northside trail has since opened, with another, the Eastside, due to open this summer. Along the route, residents can stop off in one of four parks, including D.H. Stanton Park (Atlanta's first energy-neutral park) and the Historic Fourth Ward Park, which has transformed 17 acres of wasteland into a \"glistening oasis\" according to developers. And thanks, in part, to a $25,000 donation from famed skateboarder Tony Hawk, the BeltLine has also created Atlanta's first public skatepark. Atlanta was losing population in the 1970s and 1980s Gravel says, but now the urban core is growing faster than most of the suburban counties. \"You've got thousands of people moving back into the city and that creates a lot of opportunities to leverage that growth to create the kind of place that we all want to live,\" he said. \"If we want to live compactly, sustainably and transit oriented, the BeltLine and that growth presents the opportunity to create that kind of place,\" Gravel added. \"Green space is kind of the living room of the city and as more people move into the city it becomes more and more important to have spaces where people can go get out of their homes and apartments and enjoy the diversity and life of the city.\" Major work still needs to be completed and local voters will decide in July whether to raise taxes to pay for the work and a lengthy list of other transportation projects. All the time and money being plowed into updating Atlanta's infrastructure proves that the city is \"growing up,\" Gravel says. \"We're figuring out who we are and what we want to be. And there's a lot of opportunities. The Olympics were a big one, part of that. The BeltLine is the next big major move that starts to redefine us and create who we are going to be when we grow up.\"\nTL;DR: ", "rollout/best_game/query_2": "Given the text: irl@firstvisualproperty FLDT development by Joshua Krese Images: irl@firstvisualproperty.com & Stefan Boshiol Instagram: stevenjkamp Image Gallery: skecopy\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " the first eye emerging from the goat. Notice the digit movement. And also the folds (seeing eyes as active as the spotted ones), isn't that complicated? So how do we restart the conversation that turned out to be in the most romantic of terms at first sight. You replied to Simone, here is the declaration of your participation. The following is the public documentation 195kb : https://d.jbooks.com/record2/mobi_Docs.PDF", "rollout/best_game/whole_game": "ROLE 0: Atlanta (CNN) -- Architect Ryan Gravel has lived in Atlanta, Georgia for more than 20 years, watching the city grow as it strives to compete among the world's finest. \"Great cities are great places to live and they're really vibrant and diverse,\" Gravel said. \"Atlanta has a lot of those qualities, but it really needs to be sort of kick-started and prepare for the future.\" With a growth in suburban sprawl in recent decades, Atlanta has acquired a reputation for unbearable traffic and poor public transportation. But a project originally inspired by Gravel's 1999 graduate thesis is helping to change that perception, tapping into Atlanta's potential and steering it onto a more sustainable path for the 21st century. The Atlanta BeltLine is revitalizing a 35-kilometer (22-mile) loop of abandoned railway line, transforming the land and adjacent brownfield sites into attractive new public spaces. With walking trails, cycle paths and parks dotted along the route, the project is helping connect 45 city neighborhoods. When it is completed, the BeltLine will have regenerated around 3,000 acres of land. See Also: LA's green city challenge . Construction began in 2007, with the first trail (the West End) opening the following year. A Northside trail has since opened, with another, the Eastside, due to open this summer. Along the route, residents can stop off in one of four parks, including D.H. Stanton Park (Atlanta's first energy-neutral park) and the Historic Fourth Ward Park, which has transformed 17 acres of wasteland into a \"glistening oasis\" according to developers. And thanks, in part, to a $25,000 donation from famed skateboarder Tony Hawk, the BeltLine has also created Atlanta's first public skatepark. Atlanta was losing population in the 1970s and 1980s Gravel says, but now the urban core is growing faster than most of the suburban counties. \"You've got thousands of people moving back into the city and that creates a lot of opportunities to leverage that growth to create the kind of place that we all want to live,\" he said. \"If we want to live compactly, sustainably and transit oriented, the BeltLine and that growth presents the opportunity to create that kind of place,\" Gravel added. \"Green space is kind of the living room of the city and as more people move into the city it becomes more and more important to have spaces where people can go get out of their homes and apartments and enjoy the diversity and life of the city.\" Major work still needs to be completed and local voters will decide in July whether to raise taxes to pay for the work and a lengthy list of other transportation projects. All the time and money being plowed into updating Atlanta's infrastructure proves that the city is \"growing up,\" Gravel says. \"We're figuring out who we are and what we want to be. And there's a lot of opportunities. The Olympics were a big one, part of that. The BeltLine is the next big major move that starts to redefine us and create who we are going to be when we grow up.\"\nTL;DR: \nROLE 1: Given the text: irl@firstvisualproperty FLDT development by Joshua Krese Images: irl@firstvisualproperty.com & Stefan Boshiol Instagram: stevenjkamp Image Gallery: skecopy\nReconstruct the summarized text to the detailed:\nROLE 2: the first eye emerging from the goat. Notice the digit movement. And also the folds (seeing eyes as active as the spotted ones), isn't that complicated? So how do we restart the conversation that turned out to be in the most romantic of terms at first sight. You replied to Simone, here is the declaration of your participation. The following is the public documentation 195kb : https://d.jbooks.com/record2/mobi_Docs.PDF", "rollout/best_game/overall_score": -135.63107890613242, "rollout/best_game/accuracy (r2)": 0.07200073827980803, "_this_batch_num_rl_token": 22942, "num_rl_rollout": 1, "lm_epoch": 0, "rl_epoch": 0, "step": 0, "total_data_token": 22942, "total_rl_token": 0, "total_lm_token": 0, "total_token": 0, "completed_steps": 0, "rollout/num_train_sample": 160, "timestamp": 1772075080, "checkpoint_dir_name": null, "done": false, "training_iteration": 2, "trial_id": "284c96d2", "date": "2026-02-26_11-04-40", "time_this_iter_s": 25.891287565231323, "time_total_s": 74.70551085472107, "pid": 208365, "hostname": "candle", "node_ip": "10.2.1.32", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "sim_with_one_golden__cnndm_train", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 0.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 32, "max_new_tokens": 135}, "rollout_config": {"accuracy_w": 1.96282248134428, "len_pen": 1.0, "accuracy_w2": 19.549524484259873, "len_pen2": 1.0, "threshold": 0.01605191133358762, "similarity_fn": "rouge", "max_ctx_len": 860, "sampling_params_1": {"n": 1, "min_tokens": 32, "max_tokens": 135, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 5, "max_tokens": 860, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___10"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/rouge2___2.0", "cnndm/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 74.70551085472107, "iterations_since_restore": 2}
	{"rollout/num_samples": 160, "rollout/avg_q1_length": 759.96875, "rollout/std_q1_length": 156.7252655029297, "rollout/gen/avg_score": -1121.604736328125, "rollout/gen/std_score": 2201.693603515625, "rollout/gen/avg_r1_length": 125.09375, "rollout/gen/std_r1_length": 27.211666107177734, "rollout/gen/avg_r1_score": -216.55218505859375, "rollout/gen/std_r1_score": 60.28881072998047, "rollout/gen/avg_r1_accuracy": 0.1353650689125061, "rollout/gen/std_r1_accuracy": 0.022726470604538918, "rollout/gen/avg_r2_length": 560.8046875, "rollout/gen/std_r2_length": 323.16473388671875, "rollout/gen/avg_r2_score": -1347.8677978515625, "rollout/gen/std_r2_score": 2410.390625, "rollout/gen/avg_r2_accuracy": 0.09866699576377869, "rollout/gen/std_r2_accuracy": 0.045514825731515884, "rollout/best_game/query_1": "A South\u00a0Carolina\u00a0Sate Senator is under fire after saying that it's okay to make fun of women because they are 'a lesser cut of meat' than men. Republican State Sen. Thomas Corbin said the statement, reportedly directed at Republican South Carolina State Sen. Katrina Shealy, in a discussion over a pending criminal domestic violence (CDV) bill. Shealy is the only female member of South Carolina's 46-person State Senate. Republican State Sen. Thomas Corbin statement, it's okay to make fun of women because they are from 'a lesser cut of meat' than men, was reportedly directed at Republican South Carolina State Sen. Katrina Shealy in a discussion over a pending criminal domestic violence (CDV) bill . 'Well, you know God created man first. Then he took the rib out of man to make woman. And you know, a rib is a lesser cut of meat,' Corbin said at a dinner at a Brazilian Statehouse with fellow Senate judiciary committee members, according to FITSNews. The sexist comment came after Shealy reportedly asked Corbin where he 'got off' attacking women during the\u00a0discussion\u00a0about the pending bill. He insists, however, that he was joking when he made his statement, according to WISTV. The sexist comment came after Shealy (pictured) reportedly asked Corbin where he 'got off' attacking women during the discussion about the pending bill . 'We were all joking and laughing' he told WISTV. 'We cut up together.' He also reportedly told others that he got Shealy 'wearing shoes', referring to the idea of women being barefoot and pregnant. Shealy told Corbin that he had pushed her far enough, according to WISTV. She said that she told Corbin: 'I've worked three times harder than you did to get here... I deserve respect and I'm going to get it.' This isn't the first time Corbin has made sexist remarks to his colleagues. A Senate staffer told FITSNews that he makes comments like that 'all the time to everybody'.\nTL;DR: ", "rollout/best_game/query_2": "Given the text: \u00a0- Local politician wants its BS drugged out\nNow it's legal for a female candidate in hypothetical prison for violating the Civil, Infraction, Deceptive, or Violation Clauses of the Second Amendment.\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " What happened when Angela Khan stormed Rabasa-Libyan 9/11 Commission in NYC\u00a0", "rollout/best_game/whole_game": "ROLE 0: A South\u00a0Carolina\u00a0Sate Senator is under fire after saying that it's okay to make fun of women because they are 'a lesser cut of meat' than men. Republican State Sen. Thomas Corbin said the statement, reportedly directed at Republican South Carolina State Sen. Katrina Shealy, in a discussion over a pending criminal domestic violence (CDV) bill. Shealy is the only female member of South Carolina's 46-person State Senate. Republican State Sen. Thomas Corbin statement, it's okay to make fun of women because they are from 'a lesser cut of meat' than men, was reportedly directed at Republican South Carolina State Sen. Katrina Shealy in a discussion over a pending criminal domestic violence (CDV) bill . 'Well, you know God created man first. Then he took the rib out of man to make woman. And you know, a rib is a lesser cut of meat,' Corbin said at a dinner at a Brazilian Statehouse with fellow Senate judiciary committee members, according to FITSNews. The sexist comment came after Shealy reportedly asked Corbin where he 'got off' attacking women during the\u00a0discussion\u00a0about the pending bill. He insists, however, that he was joking when he made his statement, according to WISTV. The sexist comment came after Shealy (pictured) reportedly asked Corbin where he 'got off' attacking women during the discussion about the pending bill . 'We were all joking and laughing' he told WISTV. 'We cut up together.' He also reportedly told others that he got Shealy 'wearing shoes', referring to the idea of women being barefoot and pregnant. Shealy told Corbin that he had pushed her far enough, according to WISTV. She said that she told Corbin: 'I've worked three times harder than you did to get here... I deserve respect and I'm going to get it.' This isn't the first time Corbin has made sexist remarks to his colleagues. A Senate staffer told FITSNews that he makes comments like that 'all the time to everybody'.\nTL;DR: \nROLE 1: Given the text: \u00a0- Local politician wants its BS drugged out\nNow it's legal for a female candidate in hypothetical prison for violating the Civil, Infraction, Deceptive, or Violation Clauses of the Second Amendment.\nReconstruct the summarized text to the detailed:\nROLE 2: What happened when Angela Khan stormed Rabasa-Libyan 9/11 Commission in NYC\u00a0", "rollout/best_game/overall_score": -68.14749447681345, "rollout/best_game/accuracy (r2)": 0.005813953488372093, "_this_batch_num_rl_token": 24319, "num_rl_rollout": 2, "lm_epoch": 0, "rl_epoch": 0, "step": 20, "total_data_token": 47261, "total_rl_token": 85473, "total_lm_token": 0, "total_token": 85473, "completed_steps": 20, "rollout/num_train_sample": 160, "timestamp": 1772075118, "checkpoint_dir_name": null, "done": false, "training_iteration": 3, "trial_id": "284c96d2", "date": "2026-02-26_11-05-18", "time_this_iter_s": 37.97566270828247, "time_total_s": 112.68117356300354, "pid": 208365, "hostname": "candle", "node_ip": "10.2.1.32", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "sim_with_one_golden__cnndm_train", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 0.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 32, "max_new_tokens": 135}, "rollout_config": {"accuracy_w": 1.96282248134428, "len_pen": 1.0, "accuracy_w2": 19.549524484259873, "len_pen2": 1.0, "threshold": 0.01605191133358762, "similarity_fn": "rouge", "max_ctx_len": 860, "sampling_params_1": {"n": 1, "min_tokens": 32, "max_tokens": 135, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 5, "max_tokens": 860, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___10"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/rouge2___2.0", "cnndm/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 112.68117356300354, "iterations_since_restore": 3}
	{"rollout/num_samples": 160, "rollout/avg_q1_length": 663.875, "rollout/std_q1_length": 234.5196533203125, "rollout/gen/avg_score": -948.810546875, "rollout/gen/std_score": 1667.57763671875, "rollout/gen/avg_r1_length": 129.09375, "rollout/gen/std_r1_length": 18.940244674682617, "rollout/gen/avg_r1_score": -220.39260864257812, "rollout/gen/std_r1_score": 50.57535934448242, "rollout/gen/avg_r1_accuracy": 0.1458701491355896, "rollout/gen/std_r1_accuracy": 0.020854931324720383, "rollout/gen/avg_r2_length": 643.0625, "rollout/gen/std_r2_length": 293.1659851074219, "rollout/gen/avg_r2_score": -1130.9150390625, "rollout/gen/std_r2_score": 1820.37158203125, "rollout/gen/avg_r2_accuracy": 0.11099065095186234, "rollout/gen/std_r2_accuracy": 0.041133467108011246, "rollout/best_game/query_1": "(CNN) -- Money, husbands, record deals or a free stay at a celebrity sober living house. Just when you thought there couldn't be a more shocking trade-off for national exposure on a reality show, E! brings you \"Bridalplasty.\" The new series, set to make its debut November 28 at 9 p.m., will feature 12 brides-to-be competing for a different plastic surgery each week as they prepare to walk down the aisle. Despite \"Bridalplasty's\" unsettling tagline -- \"the only show where the winner gets cut\" -- the reality competition follows a surprisingly standard format: Each week, the contestants will participate in a wedding- or relationship-themed challenge, such as picking the perfect dress, according to Jason Sarlanis, vice president of original programming and series development for E! So what's the grand prize? The last bride standing will receive her dream wedding, paid for by the show, and the remaining procedures on a \"wish list,\" which she drafts at the beginning of the season with the help of Dr. Terry Dubrow, who appeared on Fox's \"The Swan\" in 2004. The procedures will range from veneers and Botox to breast augmentations and tummy tucks. And like many reality competition shows, a winner is granted immunity each week and thus is exempt from competing the following episode. But, on \"Bridalplasty,\" immunity is also a form of \"medical leave.\" \"[Contestants] will wait until they're ready to return -- doctor's orders,\" Sarlanis said. \"They won't participate in any part of the episode that could mess with recuperation.\" But recovery time is not the only concern for Dr. John Diaz, a board-certified plastic surgeon in Beverly Hills, California, who is not affiliated with the show. Diaz says, \"By competing in a show like this, patients might feel pressured to undergo surgeries they wouldn't have under normal circumstances.\" That pressure, he says, can stem from contestants' peers on the show, as well as the fact that the surgeries are free. \"If a contestant is only interested in her nose, but every other woman around her is talking about her breasts, all of a sudden, she may feel pressure to do her breasts, as well,\" Diaz said. \"It's almost like a peer pressure spurred on by this competition.\" Diaz also said that major events, like weddings or reunions, sometimes cause people to evaluate their appearance. \"Who doesn't want to look their best on their wedding day?\" Sarlanis said. \"Plastic surgery is a taboo topic, and a lot of people support it, and a lot of people consider it a guilty pleasure, and a lot of people are against it.\" He added: \"A show like this could go different ways. At the end of the day, we're making a really great show.\" Supporters and critics alike agree that \"Bridalplasty\" will most likely be a hit. \"This is the natural, absurd next step for reality programming. ... It has all the elements of what would be successful for a faceless demographic: bridal themes, aspiration themes, plastic surgery,\" said Colby Hall, the managing editor of Mediaite.com, a website that assesses print, online and broadcast media. \"It's going to be a huge hit in the way 'Jersey Shore' was a huge hit,\" he added. \"It's a train wreck to watch, but people will love to sort of feel better than the participants.\" Sarah Polonsky, senior editor at BettyConfidential.com, said she's worried the show will send a negative message to viewers -- \"equating cash to plastic surgery.\" \"Anyone emotionally unstable enough to desire that much plastic surgery really needs Botox for the soul,\" Polonsky said. \"They should be giving out therapy instead of nose jobs. ... When you start planning a wedding to include lip [injections] and rhinoplasty in addition to flower arrangements, it's almost as belittling to your groom as it is to yourself.\" Of course, \"Bridalplasty\" is not the first TV show about plastic", "rollout/best_game/query_2": "Given the text: surgery. In 2005 Robert Downey Jr. created the reality TV show \"Big Brother: Channel 10\" for CBS.\n\nRead more trending stories\n\nHe didn't design a trick to make anyone look more fine\n\nSenate fights to pass laws defining obesity more fluidly\n\nAre you braver? We caught up with some of the bright futures that made life on television possible Because you can't stay thin, science shows us change\n\nExperts call on tech companies to reconsider conversion therapy and nutrition recommendations\n\nWhat's the best anti-aging medicine?\n\nCatch up with last week's six trends.\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " p4-349\n\nStatement of opinion by Jerry Maranet", "rollout/best_game/whole_game": "ROLE 0: (CNN) -- Money, husbands, record deals or a free stay at a celebrity sober living house. Just when you thought there couldn't be a more shocking trade-off for national exposure on a reality show, E! brings you \"Bridalplasty.\" The new series, set to make its debut November 28 at 9 p.m., will feature 12 brides-to-be competing for a different plastic surgery each week as they prepare to walk down the aisle. Despite \"Bridalplasty's\" unsettling tagline -- \"the only show where the winner gets cut\" -- the reality competition follows a surprisingly standard format: Each week, the contestants will participate in a wedding- or relationship-themed challenge, such as picking the perfect dress, according to Jason Sarlanis, vice president of original programming and series development for E! So what's the grand prize? The last bride standing will receive her dream wedding, paid for by the show, and the remaining procedures on a \"wish list,\" which she drafts at the beginning of the season with the help of Dr. Terry Dubrow, who appeared on Fox's \"The Swan\" in 2004. The procedures will range from veneers and Botox to breast augmentations and tummy tucks. And like many reality competition shows, a winner is granted immunity each week and thus is exempt from competing the following episode. But, on \"Bridalplasty,\" immunity is also a form of \"medical leave.\" \"[Contestants] will wait until they're ready to return -- doctor's orders,\" Sarlanis said. \"They won't participate in any part of the episode that could mess with recuperation.\" But recovery time is not the only concern for Dr. John Diaz, a board-certified plastic surgeon in Beverly Hills, California, who is not affiliated with the show. Diaz says, \"By competing in a show like this, patients might feel pressured to undergo surgeries they wouldn't have under normal circumstances.\" That pressure, he says, can stem from contestants' peers on the show, as well as the fact that the surgeries are free. \"If a contestant is only interested in her nose, but every other woman around her is talking about her breasts, all of a sudden, she may feel pressure to do her breasts, as well,\" Diaz said. \"It's almost like a peer pressure spurred on by this competition.\" Diaz also said that major events, like weddings or reunions, sometimes cause people to evaluate their appearance. \"Who doesn't want to look their best on their wedding day?\" Sarlanis said. \"Plastic surgery is a taboo topic, and a lot of people support it, and a lot of people consider it a guilty pleasure, and a lot of people are against it.\" He added: \"A show like this could go different ways. At the end of the day, we're making a really great show.\" Supporters and critics alike agree that \"Bridalplasty\" will most likely be a hit. \"This is the natural, absurd next step for reality programming. ... It has all the elements of what would be successful for a faceless demographic: bridal themes, aspiration themes, plastic surgery,\" said Colby Hall, the managing editor of Mediaite.com, a website that assesses print, online and broadcast media. \"It's going to be a huge hit in the way 'Jersey Shore' was a huge hit,\" he added. \"It's a train wreck to watch, but people will love to sort of feel better than the participants.\" Sarah Polonsky, senior editor at BettyConfidential.com, said she's worried the show will send a negative message to viewers -- \"equating cash to plastic surgery.\" \"Anyone emotionally unstable enough to desire that much plastic surgery really needs Botox for the soul,\" Polonsky said. \"They should be giving out therapy instead of nose jobs. ... When you start planning a wedding to include lip [injections] and rhinoplasty in addition to flower arrangements, it's almost as belittling to your groom as it is to yourself.\" Of course, \"Bridalplasty\" is not the first TV show about plastic\nROLE 1: Given the text: surgery. In 2005 Robert Downey Jr. created the reality TV show \"Big Brother: Channel 10\" for CBS.\n\nRead more trending stories\n\nHe didn't design a trick to make anyone look more fine\n\nSenate fights to pass laws defining obesity more fluidly\n\nAre you braver? We caught up with some of the bright futures that made life on television possible Because you can't stay thin, science shows us change\n\nExperts call on tech companies to reconsider conversion therapy and nutrition recommendations\n\nWhat's the best anti-aging medicine?\n\nCatch up with last week's six trends.\nReconstruct the summarized text to the detailed:\nROLE 2: p4-349\n\nStatement of opinion by Jerry Maranet", "rollout/best_game/overall_score": -147.70253169275455, "rollout/best_game/accuracy (r2)": 0.003337505214851899, "_this_batch_num_rl_token": 21244, "num_rl_rollout": 3, "lm_epoch": 0, "rl_epoch": 0, "step": 40, "total_data_token": 68505, "total_rl_token": 168638, "total_lm_token": 0, "total_token": 168638, "completed_steps": 40, "rollout/num_train_sample": 160, "timestamp": 1772075156, "checkpoint_dir_name": null, "done": false, "training_iteration": 4, "trial_id": "284c96d2", "date": "2026-02-26_11-05-56", "time_this_iter_s": 38.329296827316284, "time_total_s": 151.01047039031982, "pid": 208365, "hostname": "candle", "node_ip": "10.2.1.32", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "sim_with_one_golden__cnndm_train", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 0.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 32, "max_new_tokens": 135}, "rollout_config": {"accuracy_w": 1.96282248134428, "len_pen": 1.0, "accuracy_w2": 19.549524484259873, "len_pen2": 1.0, "threshold": 0.01605191133358762, "similarity_fn": "rouge", "max_ctx_len": 860, "sampling_params_1": {"n": 1, "min_tokens": 32, "max_tokens": 135, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 5, "max_tokens": 860, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___10"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/rouge2___2.0", "cnndm/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 151.01047039031982, "iterations_since_restore": 4}
	{"rollout/num_samples": 160, "rollout/avg_q1_length": 679.0625, "rollout/std_q1_length": 188.9305877685547, "rollout/gen/avg_score": -937.8048706054688, "rollout/gen/std_score": 1670.9984130859375, "rollout/gen/avg_r1_length": 119.375, "rollout/gen/std_r1_length": 31.15802574157715, "rollout/gen/avg_r1_score": -198.29525756835938, "rollout/gen/std_r1_score": 73.01620483398438, "rollout/gen/avg_r1_accuracy": 0.13701188564300537, "rollout/gen/std_r1_accuracy": 0.018793070688843727, "rollout/gen/avg_r2_length": 636.375, "rollout/gen/std_r2_length": 297.41070556640625, "rollout/gen/avg_r2_score": -1122.682373046875, "rollout/gen/std_r2_score": 1822.7027587890625, "rollout/gen/avg_r2_accuracy": 0.10813568532466888, "rollout/gen/std_r2_accuracy": 0.03942136839032173, "rollout/best_game/query_1": "By . Lucy Buckland . PUBLISHED: . 07:27 EST, 18 June 2012 . \| . UPDATED: . 10:02 EST, 18 June 2012 . Coronation Street actor and performer Brian Hibbard died today after a long battle with prostate cancer. Hibbard, 65, first became famous for forming band The Flying Pickets who topped the charts in 1983 with the surprise hit Only You, before moving into acting. He starred in the cult film Twin Town and appeared in Emmerdale, Coronation Street and EastEnders. Tributes: Actor and performer Brian Hibbard died today (pictured here in Newport in 2006) On the cobbles: Brian Hibbard as Doug in Coronation Street . But he was best known for his role as mechanic Doug Murray in Coronation Street in1992 where he was one of Deidre Barlow\u2019s many lovers. He then went on to star in Emmerdale as Bobby-John Downes and Johnny Mac in the long-running Welsh series Pobol y Cwm. Hibbard died at his home in Roath, Cardiff with his actress wife Caroline, 52, and three children Lilly, Hafwen and Cai at his bedside. Hit: Brian Hibbard with the Flying Pickets who had a number one hit in 1983 with Only You . Hibbard had first been diagnosed with prostate cancer 12 years ago but only revealed it publicly in 2008. His health is thought to have taken a turn for the worst eight weeks ago and he was admitted to hospital. Hibbard, born in Ebbw Vale, South Wales, . worked as a teacher, steel worker, barman and chimney sweep, before . forming the acapella group the Flying Pickets. They had a huge hit with Only You, with it spending five weeks at the top of the chart and won the coveted Christmas number one slot in 1983. Tributes: Colleagues and friends took to Twitter to pay their respects . On the dales: Brain played Bobby-John in Emmerdale in 2006 . His agent confirmed his death yesterday and tributes were paid to a 'great man and lover of life' by his friends. This afternoon friends remembered Hibbard, making his name trend over Twitter, many paying tribute to his great sense of humour and enormous talent. Fellow actor Boyd Clack, who starred in Twin Town with Hibbard, told BBC Wales he first met Hibbard 28 years ago in a Theatr Clwyd production. He said: 'Within two days of meeting him, he'd shown me his naked bum. He was a bit of a character all right. 'He was also fantastic actor, fantastic singer and a fantastic man. He was an old-fashioned socialist [who] was caring and kind. He did a huge amount of work for charity over the years. 'He was an honest person. He supported the miners during the strike; he supported every cause.' In the Vic: Brain Hibbard played social worker Henry Mason in EastEnders last year . TV star: Brian also starred in comic drama Tales From Pleasure Beach in 2001 . Stalwart Hibbard also starred in the Welsh drama Pam Fi, Duw and Twin Town starring Rhys Ifans in 1997. Hibbard went on to appear in films including Twin Town and television shows with roles in Casualty, Coronation Street and Making Out. One of his last roles was in EastEnders, playing a former social worker called Henry Mason.\nTL;DR: ", "rollout/best_game/query_2": "Given the text: \u00a0Brian Hibbard died today, May 26th (save for a chilling visual version posted on his Instagram by illustrator Benedict Rosen.) While daydreaming in Scotch Soldier appreciation for the stuff he's written and shared on these 1,427 snaps, he sat in grim death -- sleepily.\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": "\n1) Giles longie with Mickey and Ari other daydreamers .\n2) Brian Hibbard with Vincent Simme (top and bottom) (top gloating roads)\n3) Birds fondling cloud tundra beside creek effluent water.\n1) Giles big mosked because the French river are carrying both.\n2) Brian Hibbard hewn half may, but just before giving them each another headlong shove the bassoon arched up and tadpole stood off-center between the mogh leafing green algae track\nJustin Jacobson's Last Lunch: Lessons Learned from Merriam and Page (Green piece) by his wife Chirlan Salzburger Eric Larson (author of Mistry Leafed sleeve before Title of the Book: Artistic and Political Ideas of Fabian Society By Robert Thomas, published with permission by Jonathan Eric Larson).", "rollout/best_game/whole_game": "ROLE 0: By . Lucy Buckland . PUBLISHED: . 07:27 EST, 18 June 2012 . \| . UPDATED: . 10:02 EST, 18 June 2012 . Coronation Street actor and performer Brian Hibbard died today after a long battle with prostate cancer. Hibbard, 65, first became famous for forming band The Flying Pickets who topped the charts in 1983 with the surprise hit Only You, before moving into acting. He starred in the cult film Twin Town and appeared in Emmerdale, Coronation Street and EastEnders. Tributes: Actor and performer Brian Hibbard died today (pictured here in Newport in 2006) On the cobbles: Brian Hibbard as Doug in Coronation Street . But he was best known for his role as mechanic Doug Murray in Coronation Street in1992 where he was one of Deidre Barlow\u2019s many lovers. He then went on to star in Emmerdale as Bobby-John Downes and Johnny Mac in the long-running Welsh series Pobol y Cwm. Hibbard died at his home in Roath, Cardiff with his actress wife Caroline, 52, and three children Lilly, Hafwen and Cai at his bedside. Hit: Brian Hibbard with the Flying Pickets who had a number one hit in 1983 with Only You . Hibbard had first been diagnosed with prostate cancer 12 years ago but only revealed it publicly in 2008. His health is thought to have taken a turn for the worst eight weeks ago and he was admitted to hospital. Hibbard, born in Ebbw Vale, South Wales, . worked as a teacher, steel worker, barman and chimney sweep, before . forming the acapella group the Flying Pickets. They had a huge hit with Only You, with it spending five weeks at the top of the chart and won the coveted Christmas number one slot in 1983. Tributes: Colleagues and friends took to Twitter to pay their respects . On the dales: Brain played Bobby-John in Emmerdale in 2006 . His agent confirmed his death yesterday and tributes were paid to a 'great man and lover of life' by his friends. This afternoon friends remembered Hibbard, making his name trend over Twitter, many paying tribute to his great sense of humour and enormous talent. Fellow actor Boyd Clack, who starred in Twin Town with Hibbard, told BBC Wales he first met Hibbard 28 years ago in a Theatr Clwyd production. He said: 'Within two days of meeting him, he'd shown me his naked bum. He was a bit of a character all right. 'He was also fantastic actor, fantastic singer and a fantastic man. He was an old-fashioned socialist [who] was caring and kind. He did a huge amount of work for charity over the years. 'He was an honest person. He supported the miners during the strike; he supported every cause.' In the Vic: Brain Hibbard played social worker Henry Mason in EastEnders last year . TV star: Brian also starred in comic drama Tales From Pleasure Beach in 2001 . Stalwart Hibbard also starred in the Welsh drama Pam Fi, Duw and Twin Town starring Rhys Ifans in 1997. Hibbard went on to appear in films including Twin Town and television shows with roles in Casualty, Coronation Street and Making Out. One of his last roles was in EastEnders, playing a former social worker called Henry Mason.\nTL;DR: \nROLE 1: Given the text: \u00a0Brian Hibbard died today, May 26th (save for a chilling visual version posted on his Instagram by illustrator Benedict Rosen.) While daydreaming in Scotch Soldier appreciation for the stuff he's written and shared on these 1,427 snaps, he sat in grim death -- sleepily.\nReconstruct the summarized text to the detailed:\nROLE 2: \n1) Giles longie with Mickey and Ari other daydreamers .\n2) Brian Hibbard with Vincent Simme (top and bottom) (top gloating roads)\n3) Birds fondling cloud tundra beside creek effluent water.\n1) Giles big mosked because the French river are carrying both.\n2) Brian Hibbard hewn half may, but just before giving them each another headlong shove the bassoon arched up and tadpole stood off-center between the mogh leafing green algae track\nJustin Jacobson's Last Lunch: Lessons Learned from Merriam and Page (Green piece) by his wife Chirlan Salzburger Eric Larson (author of Mistry Leafed sleeve before Title of the Book: Artistic and Political Ideas of Fabian Society By Robert Thomas, published with permission by Jonathan Eric Larson).", "rollout/best_game/overall_score": -241.8123033752852, "rollout/best_game/accuracy (r2)": 0.0600664772882254, "_this_batch_num_rl_token": 21730, "num_rl_rollout": 4, "lm_epoch": 0, "rl_epoch": 0, "step": 60, "total_data_token": 90235, "total_rl_token": 258705, "total_lm_token": 0, "total_token": 258705, "completed_steps": 60, "rollout/num_train_sample": 160, "timestamp": 1772075196, "checkpoint_dir_name": null, "done": false, "training_iteration": 5, "trial_id": "284c96d2", "date": "2026-02-26_11-06-36", "time_this_iter_s": 39.844894886016846, "time_total_s": 190.85536527633667, "pid": 208365, "hostname": "candle", "node_ip": "10.2.1.32", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "sim_with_one_golden__cnndm_train", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 0.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 32, "max_new_tokens": 135}, "rollout_config": {"accuracy_w": 1.96282248134428, "len_pen": 1.0, "accuracy_w2": 19.549524484259873, "len_pen2": 1.0, "threshold": 0.01605191133358762, "similarity_fn": "rouge", "max_ctx_len": 860, "sampling_params_1": {"n": 1, "min_tokens": 32, "max_tokens": 135, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 5, "max_tokens": 860, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___10"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/rouge2___2.0", "cnndm/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 190.85536527633667, "iterations_since_restore": 5}
	{"rollout/num_samples": 160, "rollout/avg_q1_length": 718.90625, "rollout/std_q1_length": 184.86166381835938, "rollout/gen/avg_score": -946.8731689453125, "rollout/gen/std_score": 1668.0435791015625, "rollout/gen/avg_r1_length": 119.46875, "rollout/gen/std_r1_length": 32.05839920043945, "rollout/gen/avg_r1_score": -198.31063842773438, "rollout/gen/std_r1_score": 73.22761535644531, "rollout/gen/avg_r1_accuracy": 0.14221912622451782, "rollout/gen/std_r1_accuracy": 0.017027638852596283, "rollout/gen/avg_r2_length": 651.4375, "rollout/gen/std_r2_length": 280.8484802246094, "rollout/gen/avg_r2_score": -1134.013671875, "rollout/gen/std_r2_score": 1818.1427001953125, "rollout/gen/avg_r2_accuracy": 0.11426368355751038, "rollout/gen/std_r2_accuracy": 0.03658073768019676, "rollout/best_game/query_1": "By . Ian Sparks . PUBLISHED: . 10:24 EST, 16 November 2012 . \| . UPDATED: . 06:25 EST, 17 November 2012 . A British monk has died after dousing himself in petrol and setting himself on fire at a Buddhist monastery in southern France. Venerable Lobsang Tonden, 38, who was born David Alain, may have been making a 'political gesture' against the Chinese occupation of Tibet, police said. Horrified fellow monks discovered the man ablaze at the Nalanda monastery in the village of Labastide-Saint-Georges at 5pm on Thursday. Self-immolation: Venerable Lobsang Tonden, who was born David Alain, died after setting himself on fire in France . Firefighters and police were called to the scene but were unable to save the monk's life. Gendarmerie . colonel Pierre Bouquin said: 'The man appears to have poured petrol . over his head and clothes and set himself on fire. 'He was discovered a few minutes later by other monks in a garden at the monastery. 'We . believe he may have been making a political gesture related to Tibet . but an investigation will be carried out to establish the full facts.' Protest: Images on the Nalanda monastery website show monks supporting Tibetan monks in their struggle against China - this self-immolation is suspected to be in support of Tibet . Discovery: Monks from the monastery stumbled across the Brit ablaze in the garden . Head monk the Venerable Lobsang Tendar said the victim had been at the monastery for five years but had no comment to make on the tragedy. The Nalanda monastery was set up in 1981 and has around 50 monks from 20 countries. Self-immolation by Buddhist monks occurs has occurred around 60 times since March 2011 in Tibet, as gestures of defiance against the Chinese occupation of their country and oppression of Tibetan culture and religion. Tension has risen in the past year but . Beijing insists that Tibetans have benefited from improved living . standards brought on by China and say the monks enjoy religious freedom. There has been a spate of . self-immolations in recent days with Chinese military forces mobilised . in occupied Tibet on November 8 after a trio of teenage monks set fire . to themselves. Scenic: The Nalanda monastery is located in southern France, in the village of Labastide-Saint-Georges . The boys called out for freedom . in Tibet and for the return of the Dalai Lama as they set themselves on . fire. Security forces arrived at the scene almost immediately. The youngest monk, 15-year-old Dorjee, died at the scene. His companions, 16-year-olds Samdup and Dorjee Kyab, were taken to hospital by security forces. In late October Dorjee Rinchen, 58, set himself alight outside a Chinese government building. He burned to death in Labrang, . occupied Tibet, and security forces clashed with mourners when they . attempted to bring his body back to the Gansu province, northwestern . China. Protest: Dorjee Rinchen is seen burning on the street outside the Chinese Armed Forces department in Labrang, Chinese Tibet . When Tibetan monks from the Labrang Monastery, the scene of Monday\u2019s self immolation, later attempted to visit Mr Rinchen\u2019s home to pray, they were stopped by security and forced to pray by the roadside. Free Tibet Director Stephanie Brigden said: 'Dorjee Rinchen is the second Tibetan to set himself on fire in Labrang in two days. 'Like many Tibetans in the last eighteen months, he set himself on fire in front of a government building that symbolises China\u2019s occupation. 'Tibetans are paying a terrible price for the Communist Party leadership\u2019s determination to maintain stability.'\nTL;DR: ", "rollout/best_game/query_2": "Given the text: \u00a0Tibet was the most militarily poor and least democratic country known, outside of Europe. \u00a0 However. The policies of the Chinese authorities there are horriblyly inconsistent. Its bitterly divided citizens collectively have yet to learn from the past.\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " \u00a0 \u00a0In particular, it decreed that Tibet and Xinjiang should become extensions of Chinese rule in Tibet and Xinjiang at no cost to the area. \u00a0 Needless to say that the visionary Tibetans must be rounded up and often forcefully expelled from Xinjiang due to direct efforts of Tung White .\nDownload audio and printable version here: James Moriarty's Oakland TED Talk Podcast", "rollout/best_game/whole_game": "ROLE 0: By . Ian Sparks . PUBLISHED: . 10:24 EST, 16 November 2012 . \| . UPDATED: . 06:25 EST, 17 November 2012 . A British monk has died after dousing himself in petrol and setting himself on fire at a Buddhist monastery in southern France. Venerable Lobsang Tonden, 38, who was born David Alain, may have been making a 'political gesture' against the Chinese occupation of Tibet, police said. Horrified fellow monks discovered the man ablaze at the Nalanda monastery in the village of Labastide-Saint-Georges at 5pm on Thursday. Self-immolation: Venerable Lobsang Tonden, who was born David Alain, died after setting himself on fire in France . Firefighters and police were called to the scene but were unable to save the monk's life. Gendarmerie . colonel Pierre Bouquin said: 'The man appears to have poured petrol . over his head and clothes and set himself on fire. 'He was discovered a few minutes later by other monks in a garden at the monastery. 'We . believe he may have been making a political gesture related to Tibet . but an investigation will be carried out to establish the full facts.' Protest: Images on the Nalanda monastery website show monks supporting Tibetan monks in their struggle against China - this self-immolation is suspected to be in support of Tibet . Discovery: Monks from the monastery stumbled across the Brit ablaze in the garden . Head monk the Venerable Lobsang Tendar said the victim had been at the monastery for five years but had no comment to make on the tragedy. The Nalanda monastery was set up in 1981 and has around 50 monks from 20 countries. Self-immolation by Buddhist monks occurs has occurred around 60 times since March 2011 in Tibet, as gestures of defiance against the Chinese occupation of their country and oppression of Tibetan culture and religion. Tension has risen in the past year but . Beijing insists that Tibetans have benefited from improved living . standards brought on by China and say the monks enjoy religious freedom. There has been a spate of . self-immolations in recent days with Chinese military forces mobilised . in occupied Tibet on November 8 after a trio of teenage monks set fire . to themselves. Scenic: The Nalanda monastery is located in southern France, in the village of Labastide-Saint-Georges . The boys called out for freedom . in Tibet and for the return of the Dalai Lama as they set themselves on . fire. Security forces arrived at the scene almost immediately. The youngest monk, 15-year-old Dorjee, died at the scene. His companions, 16-year-olds Samdup and Dorjee Kyab, were taken to hospital by security forces. In late October Dorjee Rinchen, 58, set himself alight outside a Chinese government building. He burned to death in Labrang, . occupied Tibet, and security forces clashed with mourners when they . attempted to bring his body back to the Gansu province, northwestern . China. Protest: Dorjee Rinchen is seen burning on the street outside the Chinese Armed Forces department in Labrang, Chinese Tibet . When Tibetan monks from the Labrang Monastery, the scene of Monday\u2019s self immolation, later attempted to visit Mr Rinchen\u2019s home to pray, they were stopped by security and forced to pray by the roadside. Free Tibet Director Stephanie Brigden said: 'Dorjee Rinchen is the second Tibetan to set himself on fire in Labrang in two days. 'Like many Tibetans in the last eighteen months, he set himself on fire in front of a government building that symbolises China\u2019s occupation. 'Tibetans are paying a terrible price for the Communist Party leadership\u2019s determination to maintain stability.'\nTL;DR: \nROLE 1: Given the text: \u00a0Tibet was the most militarily poor and least democratic country known, outside of Europe. \u00a0 However. The policies of the Chinese authorities there are horriblyly inconsistent. Its bitterly divided citizens collectively have yet to learn from the past.\nReconstruct the summarized text to the detailed:\nROLE 2: \u00a0 \u00a0In particular, it decreed that Tibet and Xinjiang should become extensions of Chinese rule in Tibet and Xinjiang at no cost to the area. \u00a0 Needless to say that the visionary Tibetans must be rounded up and often forcefully expelled from Xinjiang due to direct efforts of Tung White .\nDownload audio and printable version here: James Moriarty's Oakland TED Talk Podcast", "rollout/best_game/overall_score": -130.99650178589008, "rollout/best_game/accuracy (r2)": 0.04996153918312977, "_this_batch_num_rl_token": 23005, "num_rl_rollout": 5, "lm_epoch": 0, "rl_epoch": 0, "step": 80, "total_data_token": 113240, "total_rl_token": 344368, "total_lm_token": 0, "total_token": 344368, "completed_steps": 80, "rollout/num_train_sample": 160, "timestamp": 1772075237, "checkpoint_dir_name": null, "done": false, "training_iteration": 6, "trial_id": "284c96d2", "date": "2026-02-26_11-07-17", "time_this_iter_s": 41.31021785736084, "time_total_s": 232.1655831336975, "pid": 208365, "hostname": "candle", "node_ip": "10.2.1.32", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "sim_with_one_golden__cnndm_train", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 0.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 32, "max_new_tokens": 135}, "rollout_config": {"accuracy_w": 1.96282248134428, "len_pen": 1.0, "accuracy_w2": 19.549524484259873, "len_pen2": 1.0, "threshold": 0.01605191133358762, "similarity_fn": "rouge", "max_ctx_len": 860, "sampling_params_1": {"n": 1, "min_tokens": 32, "max_tokens": 135, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 5, "max_tokens": 860, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___10"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/rouge2___2.0", "cnndm/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 232.1655831336975, "iterations_since_restore": 6}
	{"rl_info/A2G": -3.655177354812622, "rl_info/entropy": 3.056278705596924, "rl_info/total_token": 3755.0, "rl_info/advantage_b4_norm": -1547.07470703125, "rl_info/kl_w_ref": 0.0, "train/rl_loss": 365.5174255371094, "train/total_loss": 365.5174255371094, "cnndm/rouge1": 0.20295332203327696, "cnndm/rouge2": 0.06270234512909481, "cnndm/rougeL": 0.14877834636947918, "cnndm/rougeLsum": 0.16471819101882157, "cnndm/bertscore_precision": 0.7096506178379058, "cnndm/bertscore_recall": 0.7654967844486237, "cnndm/bertscore_f1": 0.7362120985984802, "eval_agg/avg_all_rougef": 0.14478805113766813, "eval_agg/avg_all_bertf": 0.7362120985984802, "eval_agg/avg_all": 0.4405000748680742, "num_rl_rollout": 5, "lm_epoch": 0, "rl_epoch": 0, "step": 100, "total_data_token": 113240, "total_rl_token": 424093, "total_lm_token": 0, "total_token": 424093, "completed_steps": 100, "tune_objective": 0.5124110369410866, "timestamp": 1772075251, "checkpoint_dir_name": "checkpoint_000000", "should_checkpoint": true, "done": false, "training_iteration": 7, "trial_id": "284c96d2", "date": "2026-02-26_11-07-32", "time_this_iter_s": 14.673433303833008, "time_total_s": 246.83901643753052, "pid": 208365, "hostname": "candle", "node_ip": "10.2.1.32", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "sim_with_one_golden__cnndm_train", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 0.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 32, "max_new_tokens": 135}, "rollout_config": {"accuracy_w": 1.96282248134428, "len_pen": 1.0, "accuracy_w2": 19.549524484259873, "len_pen2": 1.0, "threshold": 0.01605191133358762, "similarity_fn": "rouge", "max_ctx_len": 860, "sampling_params_1": {"n": 1, "min_tokens": 32, "max_tokens": 135, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 5, "max_tokens": 860, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___10"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/rouge2___2.0", "cnndm/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 246.83901643753052, "iterations_since_restore": 7}
	{"rollout/num_samples": 160, "rollout/avg_q1_length": 728.15625, "rollout/std_q1_length": 175.83883666992188, "rollout/gen/avg_score": -994.21484375, "rollout/gen/std_score": 1817.8292236328125, "rollout/gen/avg_r1_length": 123.96875, "rollout/gen/std_r1_length": 25.353607177734375, "rollout/gen/avg_r1_score": -205.95135498046875, "rollout/gen/std_r1_score": 64.73797607421875, "rollout/gen/avg_r1_accuracy": 0.14199760556221008, "rollout/gen/std_r1_accuracy": 0.021809814497828484, "rollout/gen/avg_r2_length": 627.984375, "rollout/gen/std_r2_length": 299.61279296875, "rollout/gen/avg_r2_score": -1191.28076171875, "rollout/gen/std_r2_score": 1985.0460205078125, "rollout/gen/avg_r2_accuracy": 0.10803656280040741, "rollout/gen/std_r2_accuracy": 0.041365865617990494, "rollout/best_game/query_1": "By . Claire Bloomfield . PUBLISHED: . 07:01 EST, 13 March 2012 . \| . UPDATED: . 07:01 EST, 13 March 2012 . When his son was diagnosed with a rare medical condition that attacked his kidneys, Duane Harvard, 51, didn't think twice about donating one of his own. But his grateful 10-year-old son Raphael has nicknamed him 'Superdad' as a result. 'I don\u2019t think of that, it was a . privilege, an honour, it\u2019s the least I could have done,' Mr Harvard said. 'But he does appreciate it. When he takes his medication he is doing it for me in a way, he knows he has to look after the kidney.' Recovery: Raphael Harvard and his father Duane at Great Ormond Street Hospital after the kidney transplant . Mr Havard and his wife Anna, 35, from . Bedford, took Raphael to the doctors in February 2011 after he began feeling unwell. He was originally diagnosed with a . stomach bug and later with gastritis. But as his condition continued to worsen . over the next few days, he was rushed to hospital. Doctors consulted . Great Ormond Street Hospital (GOSH) with the then nine year-old\u2019s . alarming test results. His 'creatinine' level, a way of . measuring kidney function, was exceedingly high - the average level for a . child is 30-57 micromoles per litre and Raphael\u2019s was 1,044 - showing . his kidneys were not filtering his blood. 'They just put him in an ambulance - we . live 70 miles from London and they drove him all the way there on blue . lights,' Mr Havard said. Surgery: Raphael, 10, suffered total kidney failure after his immune system attacked those organs . Close bond: Duane Harvard (left) said it was a privilege to donate a kidney to his son . At Great Ormond Street Raphael was diagnosed with complete kidney failure, caused by the rare condition Goodpasture\u2019s Syndrome. The disease means the body\u2019s own immune system reacts against some parts of itself, creating antibodies that attack the lungs and kidneys. It has to be treated with immunosuppressive drugs to stop this process. Raphael initially spent six weeks at GOSH where he had three operations, 20 plasma exchanges and two blood transfusions. Even when at home, he had to undergo around 11 hours of dialysis a night, his bedroom converted into a clinic completely with all the equipment and hospital-level hygiene. The family\u2019s pet basset hound Charlie also had to be rehomed amid hygiene concerns. But Raphael\u2019s only hope of recovery rested with a kidney transplant. Both his mother and father volunteered as donors - deciding it should be his father who donated the organ. 'We both put ourselves forward, he\u2019s our only child and it\u2019s just the immediate instinct you have. 'You would do anything for that child, we both wanted to do it desperately. 'We decided I would go first - we couldn\u2019t have the operations in the same hospital and we thought it was better if his mother was with him in the hospital. 'You don\u2019t realise it but once you have been accepted and you\u2019re working your way through the process, you see how important that kidney is. 'There\u2019s a lot of emotional and psychological pressure on you, your child is depending on you. 'You watch him, the difficulties at home, and you know the key to a better quality of life lies in you. It was such a relief to actually have the operation.' Looking forwards: Both Raphael's mother and father stepped forward to donate him a kidney. In the end his father had the operation . The operation on November 15 was followed by an anxious six-week wait to see if it was a success.And a few months later, the family\u2019s life is starting to get back to normal - Raphael went back to school in January and his father is back at work. For football-mad Raphael, this year saw him get the chance to be the mascot at Manchester United\u2019s match against", "rollout/best_game/query_2": "Given the text: Tottenham.After returning home to Manchester, his Morehouse ongoing medical affairs were a big headache. But some six months after wrapping up Gerard Piqu\u00e9 and joining Paul Pogba in Spain for a top-flight season, he was determined to play football again. 'We were searching after him but it was something we didn\u2019t know had ever happened to the first guy anywhere,' said Mr Havard. 'I thought pleasures were important to him because he was always buying me something on the day he played at Liverpool.'Published: 13 March 2012\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " That played to Sculpt.\nSams in daydream staral revenge as he says you begin to see bosoms of love: www.Peres Definitive", "rollout/best_game/whole_game": "ROLE 0: By . Claire Bloomfield . PUBLISHED: . 07:01 EST, 13 March 2012 . \| . UPDATED: . 07:01 EST, 13 March 2012 . When his son was diagnosed with a rare medical condition that attacked his kidneys, Duane Harvard, 51, didn't think twice about donating one of his own. But his grateful 10-year-old son Raphael has nicknamed him 'Superdad' as a result. 'I don\u2019t think of that, it was a . privilege, an honour, it\u2019s the least I could have done,' Mr Harvard said. 'But he does appreciate it. When he takes his medication he is doing it for me in a way, he knows he has to look after the kidney.' Recovery: Raphael Harvard and his father Duane at Great Ormond Street Hospital after the kidney transplant . Mr Havard and his wife Anna, 35, from . Bedford, took Raphael to the doctors in February 2011 after he began feeling unwell. He was originally diagnosed with a . stomach bug and later with gastritis. But as his condition continued to worsen . over the next few days, he was rushed to hospital. Doctors consulted . Great Ormond Street Hospital (GOSH) with the then nine year-old\u2019s . alarming test results. His 'creatinine' level, a way of . measuring kidney function, was exceedingly high - the average level for a . child is 30-57 micromoles per litre and Raphael\u2019s was 1,044 - showing . his kidneys were not filtering his blood. 'They just put him in an ambulance - we . live 70 miles from London and they drove him all the way there on blue . lights,' Mr Havard said. Surgery: Raphael, 10, suffered total kidney failure after his immune system attacked those organs . Close bond: Duane Harvard (left) said it was a privilege to donate a kidney to his son . At Great Ormond Street Raphael was diagnosed with complete kidney failure, caused by the rare condition Goodpasture\u2019s Syndrome. The disease means the body\u2019s own immune system reacts against some parts of itself, creating antibodies that attack the lungs and kidneys. It has to be treated with immunosuppressive drugs to stop this process. Raphael initially spent six weeks at GOSH where he had three operations, 20 plasma exchanges and two blood transfusions. Even when at home, he had to undergo around 11 hours of dialysis a night, his bedroom converted into a clinic completely with all the equipment and hospital-level hygiene. The family\u2019s pet basset hound Charlie also had to be rehomed amid hygiene concerns. But Raphael\u2019s only hope of recovery rested with a kidney transplant. Both his mother and father volunteered as donors - deciding it should be his father who donated the organ. 'We both put ourselves forward, he\u2019s our only child and it\u2019s just the immediate instinct you have. 'You would do anything for that child, we both wanted to do it desperately. 'We decided I would go first - we couldn\u2019t have the operations in the same hospital and we thought it was better if his mother was with him in the hospital. 'You don\u2019t realise it but once you have been accepted and you\u2019re working your way through the process, you see how important that kidney is. 'There\u2019s a lot of emotional and psychological pressure on you, your child is depending on you. 'You watch him, the difficulties at home, and you know the key to a better quality of life lies in you. It was such a relief to actually have the operation.' Looking forwards: Both Raphael's mother and father stepped forward to donate him a kidney. In the end his father had the operation . The operation on November 15 was followed by an anxious six-week wait to see if it was a success.And a few months later, the family\u2019s life is starting to get back to normal - Raphael went back to school in January and his father is back at work. For football-mad Raphael, this year saw him get the chance to be the mascot at Manchester United\u2019s match against\nROLE 1: Given the text: Tottenham.After returning home to Manchester, his Morehouse ongoing medical affairs were a big headache. But some six months after wrapping up Gerard Piqu\u00e9 and joining Paul Pogba in Spain for a top-flight season, he was determined to play football again. 'We were searching after him but it was something we didn\u2019t know had ever happened to the first guy anywhere,' said Mr Havard. 'I thought pleasures were important to him because he was always buying me something on the day he played at Liverpool.'Published: 13 March 2012\nReconstruct the summarized text to the detailed:\nROLE 2: That played to Sculpt.\nSams in daydream staral revenge as he says you begin to see bosoms of love: www.Peres Definitive", "rollout/best_game/overall_score": -148.20966199059527, "rollout/best_game/accuracy (r2)": 0.014851387398408142, "_this_batch_num_rl_token": 23301, "num_rl_rollout": 6, "lm_epoch": 0, "rl_epoch": 0, "step": 100, "total_data_token": 136541, "total_rl_token": 424093, "total_lm_token": 0, "total_token": 424093, "completed_steps": 100, "rollout/num_train_sample": 160, "timestamp": 1772075279, "checkpoint_dir_name": null, "done": false, "training_iteration": 8, "trial_id": "284c96d2", "date": "2026-02-26_11-07-59", "time_this_iter_s": 26.95094871520996, "time_total_s": 273.7899651527405, "pid": 208365, "hostname": "candle", "node_ip": "10.2.1.32", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "sim_with_one_golden__cnndm_train", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 0.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 32, "max_new_tokens": 135}, "rollout_config": {"accuracy_w": 1.96282248134428, "len_pen": 1.0, "accuracy_w2": 19.549524484259873, "len_pen2": 1.0, "threshold": 0.01605191133358762, "similarity_fn": "rouge", "max_ctx_len": 860, "sampling_params_1": {"n": 1, "min_tokens": 32, "max_tokens": 135, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 5, "max_tokens": 860, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___10"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/rouge2___2.0", "cnndm/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 273.7899651527405, "iterations_since_restore": 8}
	{"rollout/num_samples": 160, "rollout/avg_q1_length": 776.125, "rollout/std_q1_length": 148.59356689453125, "rollout/gen/avg_score": -752.787353515625, "rollout/gen/std_score": 1102.0655517578125, "rollout/gen/avg_r1_length": 120.375, "rollout/gen/std_r1_length": 31.762983322143555, "rollout/gen/avg_r1_score": -205.44552612304688, "rollout/gen/std_r1_score": 70.88546752929688, "rollout/gen/avg_r1_accuracy": 0.14400672912597656, "rollout/gen/std_r1_accuracy": 0.018721893429756165, "rollout/gen/avg_r2_length": 638.4609375, "rollout/gen/std_r2_length": 287.7415466308594, "rollout/gen/avg_r2_score": -889.622802734375, "rollout/gen/std_r2_score": 1193.7308349609375, "rollout/gen/avg_r2_accuracy": 0.11459691822528839, "rollout/gen/std_r2_accuracy": 0.040419820696115494, "rollout/best_game/query_1": "By . Mark Prigg . PUBLISHED: . 06:55 EST, 9 April 2013 . \| . UPDATED: . 07:34 EST, 9 April 2013 . Researchers have unveiled a DNA database for rhino horn they hope will cut down on poaching. The Scottish Government has joined the fight against the illegal trade in rhinoceros horn by setting up a DNA database which will help trace the origin of stolen horns seized by police. Rhinoceros horn now fetches more than its weight in gold on the black market and it is not just animals living in the wild that are being targeted by criminals. Angus the rhino with mum Dorothy at at Blair Drummond Safari Park: Researchers are now creating a DNA database of rhino so any seized horns can be traced . Rhinoceros horn now fetches more than . its weight in gold on the black market and it is not just animals living . in the wild that are being targeted by profit-seeking criminals. More than 50 thefts were reported from museums, galleries and auction houses in Europe in 2011. More than 50 thefts were reported from museums, galleries and auction houses in Europe in 2011. Scientists at Sasa (Science and Advice for Scottish Agriculture) are to send out sampling kits to museums and zoos across the UK to create Scotland's first rhino horn database. A study has shown that DNA profiles can be produced from museum horns that are more than 100 years old. The aim of the Defra-funded project is to protect exhibits and also live rhinos in zoos from criminal gangs who steal and sell rhino horn for thousands of pounds. Environment Minister Paul Wheelhouse said: 'The illegal trade in rhino horn has become not only a threat to these magnificent but sadly very rare animals in the wild, but also to our museums and zoos. 'The work at Sasa will help the police to crack down on the criminals who are beneath contempt and who, seemingly, will stop at nothing to exploit these endangered animals for profit.' Lucy Webster, who is co-ordinating the project at Sasa, said: 'While we hope that the horns in museums and zoos in the UK are safe, where large sums of money are involved there will always be some pressure from criminals who seek to exploit these resources. More than 50 thefts of rhinoceros horns were reported from museums, galleries and auction houses in Europe in 2011 . 'This database will store a unique DNA profile for each sample submitted. If any are subsequently stolen, these profiles will allow horns recovered to be traced back to their place of origin - helping investigators disentangle the supply chain of this illegal trade.' The move was welcomed by WWF Scotland director Lang Banks. He said: 'The illegal trade in rhino horn is now being co-ordinated by sophisticated international criminal syndicates, with big profits to be made by supplying wealthy clients. 'As international efforts are stepped up to protect living rhinos in their natural habitats, we've seen a jump in thefts of horns from private collections in Europe. 'Taking steps to database known rhino horn in Scotland and the rest of the UK will help to close yet another door on those attempting to profit from the illegal trade in endangered wildlife.'\nTL;DR: ", "rollout/best_game/query_2": "Given the text: \u00a0Niantic investigation into how rhino horns are sold without dentures will free dogs wearing dentures and collars, which are used for protection by officials. Credit: Newsbeat Loading Tom, horn wreaths behind the microphones: An incredible piece of research stolen from a world renown museum. SPREAD THE LOVE: Well said , would you trust a white pill from a jeweller?! Contact us on: Email: ChicagoBucks@BrightpointCharm_HitlerSpeaks Distrust for Bad Romney: a stark contrast to the Curiosity campaign It clearly\u00a0is not a way of dealing with the 40% of Michael Canuck who now look on part and admire\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": "\nVeterinary reports from 12 test by Novy Higgins' (found in the RSNT file?) Vietnam vet who once ate a rhino with a swastika: \"Made on imagination through uncanny realism and human intelligence\".\u00a0 So this elderly old Chinese man who employed Hollywood elites for conditions could be the goat herself.", "rollout/best_game/whole_game": "ROLE 0: By . Mark Prigg . PUBLISHED: . 06:55 EST, 9 April 2013 . \| . UPDATED: . 07:34 EST, 9 April 2013 . Researchers have unveiled a DNA database for rhino horn they hope will cut down on poaching. The Scottish Government has joined the fight against the illegal trade in rhinoceros horn by setting up a DNA database which will help trace the origin of stolen horns seized by police. Rhinoceros horn now fetches more than its weight in gold on the black market and it is not just animals living in the wild that are being targeted by criminals. Angus the rhino with mum Dorothy at at Blair Drummond Safari Park: Researchers are now creating a DNA database of rhino so any seized horns can be traced . Rhinoceros horn now fetches more than . its weight in gold on the black market and it is not just animals living . in the wild that are being targeted by profit-seeking criminals. More than 50 thefts were reported from museums, galleries and auction houses in Europe in 2011. More than 50 thefts were reported from museums, galleries and auction houses in Europe in 2011. Scientists at Sasa (Science and Advice for Scottish Agriculture) are to send out sampling kits to museums and zoos across the UK to create Scotland's first rhino horn database. A study has shown that DNA profiles can be produced from museum horns that are more than 100 years old. The aim of the Defra-funded project is to protect exhibits and also live rhinos in zoos from criminal gangs who steal and sell rhino horn for thousands of pounds. Environment Minister Paul Wheelhouse said: 'The illegal trade in rhino horn has become not only a threat to these magnificent but sadly very rare animals in the wild, but also to our museums and zoos. 'The work at Sasa will help the police to crack down on the criminals who are beneath contempt and who, seemingly, will stop at nothing to exploit these endangered animals for profit.' Lucy Webster, who is co-ordinating the project at Sasa, said: 'While we hope that the horns in museums and zoos in the UK are safe, where large sums of money are involved there will always be some pressure from criminals who seek to exploit these resources. More than 50 thefts of rhinoceros horns were reported from museums, galleries and auction houses in Europe in 2011 . 'This database will store a unique DNA profile for each sample submitted. If any are subsequently stolen, these profiles will allow horns recovered to be traced back to their place of origin - helping investigators disentangle the supply chain of this illegal trade.' The move was welcomed by WWF Scotland director Lang Banks. He said: 'The illegal trade in rhino horn is now being co-ordinated by sophisticated international criminal syndicates, with big profits to be made by supplying wealthy clients. 'As international efforts are stepped up to protect living rhinos in their natural habitats, we've seen a jump in thefts of horns from private collections in Europe. 'Taking steps to database known rhino horn in Scotland and the rest of the UK will help to close yet another door on those attempting to profit from the illegal trade in endangered wildlife.'\nTL;DR: \nROLE 1: Given the text: \u00a0Niantic investigation into how rhino horns are sold without dentures will free dogs wearing dentures and collars, which are used for protection by officials. Credit: Newsbeat Loading Tom, horn wreaths behind the microphones: An incredible piece of research stolen from a world renown museum. SPREAD THE LOVE: Well said , would you trust a white pill from a jeweller?! Contact us on: Email: ChicagoBucks@BrightpointCharm_HitlerSpeaks Distrust for Bad Romney: a stark contrast to the Curiosity campaign It clearly\u00a0is not a way of dealing with the 40% of Michael Canuck who now look on part and admire\nReconstruct the summarized text to the detailed:\nROLE 2: \nVeterinary reports from 12 test by Novy Higgins' (found in the RSNT file?) Vietnam vet who once ate a rhino with a swastika: \"Made on imagination through uncanny realism and human intelligence\".\u00a0 So this elderly old Chinese man who employed Hollywood elites for conditions could be the goat herself.", "rollout/best_game/overall_score": -302.20347926160713, "rollout/best_game/accuracy (r2)": 0.040620628039724334, "_this_batch_num_rl_token": 24836, "num_rl_rollout": 7, "lm_epoch": 0, "rl_epoch": 0, "step": 120, "total_data_token": 161377, "total_rl_token": 520486, "total_lm_token": 0, "total_token": 520486, "completed_steps": 120, "rollout/num_train_sample": 160, "timestamp": 1772075322, "checkpoint_dir_name": null, "done": false, "training_iteration": 9, "trial_id": "284c96d2", "date": "2026-02-26_11-08-42", "time_this_iter_s": 43.675320863723755, "time_total_s": 317.46528601646423, "pid": 208365, "hostname": "candle", "node_ip": "10.2.1.32", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "sim_with_one_golden__cnndm_train", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 0.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 32, "max_new_tokens": 135}, "rollout_config": {"accuracy_w": 1.96282248134428, "len_pen": 1.0, "accuracy_w2": 19.549524484259873, "len_pen2": 1.0, "threshold": 0.01605191133358762, "similarity_fn": "rouge", "max_ctx_len": 860, "sampling_params_1": {"n": 1, "min_tokens": 32, "max_tokens": 135, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 5, "max_tokens": 860, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___10"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/rouge2___2.0", "cnndm/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 317.46528601646423, "iterations_since_restore": 9}
	{"rollout/num_samples": 160, "rollout/avg_q1_length": 646.78125, "rollout/std_q1_length": 177.5372314453125, "rollout/gen/avg_score": -881.8265991210938, "rollout/gen/std_score": 1682.109619140625, "rollout/gen/avg_r1_length": 109.0, "rollout/gen/std_r1_length": 39.974185943603516, "rollout/gen/avg_r1_score": -178.44183349609375, "rollout/gen/std_r1_score": 86.26277160644531, "rollout/gen/avg_r1_accuracy": 0.1457708477973938, "rollout/gen/std_r1_accuracy": 0.026056993752717972, "rollout/gen/avg_r2_length": 585.4609375, "rollout/gen/std_r2_length": 311.2644348144531, "rollout/gen/avg_r2_score": -1057.6727294921875, "rollout/gen/std_r2_score": 1839.7806396484375, "rollout/gen/avg_r2_accuracy": 0.11130931973457336, "rollout/gen/std_r2_accuracy": 0.04489454627037048, "rollout/best_game/query_1": "By . Lillian Radulova . An Australian athlete suffered head injuries after a a drone which had been filming the race plummeted to the ground. Raija Ogden was only metres from the finish line in the Endure Batavia Triathlon in Geraldton, Western Australia, on Sunday, when the incident happened. She claims the drone hit her on the head and that an ambulance crew later found a piece of propeller embedded in her skull. She was taken to hospital where she needed three stitches. The drone owner and operator Warren Abrams said Mrs Ogden was hurt when she fell over after being startled by the piece of equipment as it fell. The Civil Aviation Safety Authority (CASA) is investigating. Hit by drone? Paramedics treat Raija Ogden after the crash at the Endure Batavia Triathlon in Geraldton, Western Australia . 'My hair was completely red with blood.' Mrs Ogden told The Western Australian. 'I didn't hit the ground. I sat down because I just thought I was going to pass out.' She vehemently denied that the injuries were caused when she fell over. Warren Abrams, the pilot and drone owner from New Era Photography and Film, admitted that he had lost control of the drone before it fell 10 metres to the ground. However, he claimed that this was because it had been hacked and that he had also lost control in an earlier test flight when someone 'channel hopped' and took it over. Mr Abrams believes any of the spectators or competitors could have easily used a smartphone to hack into the drones operating system. He also claimed that footage from the flying camera would show that the device missed Ms Ogden and fell just behind her. A number of witnesses have expressed varying views, supporting both versions of the event. Accident: Mrs Ogden (right, pictured with her husband Courtney) was just metres away from the finish line when the drone crashed . Taken to hospital: Mrs Ogden needed three stitches and said that the ambulance crew had removed a piece of propeller from her head . CASA is looking into the incident concerning the regulations for operating the drones, which dictate the minimum distance that unmanned aircraft must be flown is 30 metres away from people at all times. The regulations relating to commercial unmanned aircraft clearly state that they 'require UAV controller\u00e2\u20ac\u2122s certification and an operator\u00e2\u20ac\u2122s certificate to fly.' Allegations have also been raised over whether Mr Abrams or his company are certified to fly the cinematographic drones, with neither seeming to appear on the list of 92 nationally certified operators. Drones, or unmanned aircraft , are best known for their military uses. However, In recent years the technology is being utilised by photography groups, agricultural industries, advertising companies and in some cases for surveying purposes, according to CASA's list of operator certificate holders. Hobbyists are also able to access devices, with affordable consumer options available on the market. Unmanned aerial vehicle: A drone equipped with a camera device similar to the device which was being used by Warren Abrams .\nTL;DR: ", "rollout/best_game/query_2": "Given the text: \u00a0The seats of the drone are tight giving the illusion of flying just in case and who knew that this would grant the crew of the ill-fated Golden Globes an advantage?\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " \"Andre did not completely resign in December 1989 but the result has been a sporadic flurry of noise during his regime \"drumming adventure\" or simply snickering. It has all started from there. He reigned on some lofty promises in the effort to promote reeducation but forced himself to change course on what he knew to be the sole target of the terror network and nadir. . . that picture star \"guldaimn\" would be gone by the end of 1991, willing to re-enter but not after Venezuela's skepticism allowed him to go into hiding. \"Let me note that he promised himself 8 months of reeducation but then Abu Dladur was left free all the same. This leading to only 2.4 months of stay memories, nothing of the sort whatsoever for many more?\"\nThe war began in October 1991 and descended into malaise and internal conflict until that point. . . thanks even more for sharing this wonderful Russian spy story . . law of drama\nAstronomy Buster on page 34, 2004", "rollout/best_game/whole_game": "ROLE 0: By . Lillian Radulova . An Australian athlete suffered head injuries after a a drone which had been filming the race plummeted to the ground. Raija Ogden was only metres from the finish line in the Endure Batavia Triathlon in Geraldton, Western Australia, on Sunday, when the incident happened. She claims the drone hit her on the head and that an ambulance crew later found a piece of propeller embedded in her skull. She was taken to hospital where she needed three stitches. The drone owner and operator Warren Abrams said Mrs Ogden was hurt when she fell over after being startled by the piece of equipment as it fell. The Civil Aviation Safety Authority (CASA) is investigating. Hit by drone? Paramedics treat Raija Ogden after the crash at the Endure Batavia Triathlon in Geraldton, Western Australia . 'My hair was completely red with blood.' Mrs Ogden told The Western Australian. 'I didn't hit the ground. I sat down because I just thought I was going to pass out.' She vehemently denied that the injuries were caused when she fell over. Warren Abrams, the pilot and drone owner from New Era Photography and Film, admitted that he had lost control of the drone before it fell 10 metres to the ground. However, he claimed that this was because it had been hacked and that he had also lost control in an earlier test flight when someone 'channel hopped' and took it over. Mr Abrams believes any of the spectators or competitors could have easily used a smartphone to hack into the drones operating system. He also claimed that footage from the flying camera would show that the device missed Ms Ogden and fell just behind her. A number of witnesses have expressed varying views, supporting both versions of the event. Accident: Mrs Ogden (right, pictured with her husband Courtney) was just metres away from the finish line when the drone crashed . Taken to hospital: Mrs Ogden needed three stitches and said that the ambulance crew had removed a piece of propeller from her head . CASA is looking into the incident concerning the regulations for operating the drones, which dictate the minimum distance that unmanned aircraft must be flown is 30 metres away from people at all times. The regulations relating to commercial unmanned aircraft clearly state that they 'require UAV controller\u00e2\u20ac\u2122s certification and an operator\u00e2\u20ac\u2122s certificate to fly.' Allegations have also been raised over whether Mr Abrams or his company are certified to fly the cinematographic drones, with neither seeming to appear on the list of 92 nationally certified operators. Drones, or unmanned aircraft , are best known for their military uses. However, In recent years the technology is being utilised by photography groups, agricultural industries, advertising companies and in some cases for surveying purposes, according to CASA's list of operator certificate holders. Hobbyists are also able to access devices, with affordable consumer options available on the market. Unmanned aerial vehicle: A drone equipped with a camera device similar to the device which was being used by Warren Abrams .\nTL;DR: \nROLE 1: Given the text: \u00a0The seats of the drone are tight giving the illusion of flying just in case and who knew that this would grant the crew of the ill-fated Golden Globes an advantage?\nReconstruct the summarized text to the detailed:\nROLE 2: \"Andre did not completely resign in December 1989 but the result has been a sporadic flurry of noise during his regime \"drumming adventure\" or simply snickering. It has all started from there. He reigned on some lofty promises in the effort to promote reeducation but forced himself to change course on what he knew to be the sole target of the terror network and nadir. . . that picture star \"guldaimn\" would be gone by the end of 1991, willing to re-enter but not after Venezuela's skepticism allowed him to go into hiding. \"Let me note that he promised himself 8 months of reeducation but then Abu Dladur was left free all the same. This leading to only 2.4 months of stay memories, nothing of the sort whatsoever for many more?\"\nThe war began in October 1991 and descended into malaise and internal conflict until that point. . . thanks even more for sharing this wonderful Russian spy story . . law of drama\nAstronomy Buster on page 34, 2004", "rollout/best_game/overall_score": -252.35715545467437, "rollout/best_game/accuracy (r2)": 0.09468918816099825, "_this_batch_num_rl_token": 20697, "num_rl_rollout": 8, "lm_epoch": 0, "rl_epoch": 0, "step": 140, "total_data_token": 182074, "total_rl_token": 608688, "total_lm_token": 0, "total_token": 608688, "completed_steps": 140, "rollout/num_train_sample": 160, "timestamp": 1772075358, "checkpoint_dir_name": null, "done": false, "training_iteration": 10, "trial_id": "284c96d2", "date": "2026-02-26_11-09-18", "time_this_iter_s": 35.20860838890076, "time_total_s": 352.673894405365, "pid": 208365, "hostname": "candle", "node_ip": "10.2.1.32", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "sim_with_one_golden__cnndm_train", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 0.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 32, "max_new_tokens": 135}, "rollout_config": {"accuracy_w": 1.96282248134428, "len_pen": 1.0, "accuracy_w2": 19.549524484259873, "len_pen2": 1.0, "threshold": 0.01605191133358762, "similarity_fn": "rouge", "max_ctx_len": 860, "sampling_params_1": {"n": 1, "min_tokens": 32, "max_tokens": 135, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 5, "max_tokens": 860, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___10"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/rouge2___2.0", "cnndm/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 352.673894405365, "iterations_since_restore": 10}
	{"rl_info/A2G": -2.559880256652832, "rl_info/entropy": 3.0525901317596436, "rl_info/total_token": 4511.0, "rl_info/advantage_b4_norm": -773.5045166015625, "rl_info/kl_w_ref": 0.0, "train/rl_loss": 255.9877166748047, "train/total_loss": 255.9877166748047, "num_rl_rollout": 8, "lm_epoch": 0, "rl_epoch": 0, "step": 141, "total_data_token": 182074, "total_rl_token": 613199, "total_lm_token": 0, "total_token": 613199, "completed_steps": 141, "timestamp": 1772075358, "checkpoint_dir_name": null, "done": false, "training_iteration": 11, "trial_id": "284c96d2", "date": "2026-02-26_11-09-18", "time_this_iter_s": 0.7599008083343506, "time_total_s": 353.43379521369934, "pid": 208365, "hostname": "candle", "node_ip": "10.2.1.32", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "sim_with_one_golden__cnndm_train", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 0.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 32, "max_new_tokens": 135}, "rollout_config": {"accuracy_w": 1.96282248134428, "len_pen": 1.0, "accuracy_w2": 19.549524484259873, "len_pen2": 1.0, "threshold": 0.01605191133358762, "similarity_fn": "rouge", "max_ctx_len": 860, "sampling_params_1": {"n": 1, "min_tokens": 32, "max_tokens": 135, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 5, "max_tokens": 860, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___10"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/rouge2___2.0", "cnndm/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 353.43379521369934, "iterations_since_restore": 11}
	{"cnndm/rouge1": 0.20295332203327696, "cnndm/rouge2": 0.06270234512909481, "cnndm/rougeL": 0.14877834636947918, "cnndm/rougeLsum": 0.16471819101882157, "cnndm/bertscore_precision": 0.7096506178379058, "cnndm/bertscore_recall": 0.7654967844486237, "cnndm/bertscore_f1": 0.7362120985984802, "eval_agg/avg_all_rougef": 0.14478805113766813, "eval_agg/avg_all_bertf": 0.7362120985984802, "eval_agg/avg_all": 0.4405000748680742, "num_rl_rollout": 5, "lm_epoch": 0, "rl_epoch": 0, "step": 100, "total_data_token": 113240, "total_rl_token": 424093, "total_lm_token": 0, "total_token": 424093, "completed_steps": 100, "tune_objective": 0.5124110369410866, "timestamp": 1772076496, "checkpoint_dir_name": null, "done": false, "training_iteration": 8, "trial_id": "284c96d2", "date": "2026-02-26_11-28-16", "time_this_iter_s": 50.257532835006714, "time_total_s": 297.09654927253723, "pid": 222837, "hostname": "candle", "node_ip": "10.2.1.32", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "sim_with_one_golden__cnndm_train", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 0.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 32, "max_new_tokens": 135}, "rollout_config": {"accuracy_w": 1.96282248134428, "len_pen": 1.0, "accuracy_w2": 19.549524484259873, "len_pen2": 1.0, "threshold": 0.01605191133358762, "similarity_fn": "rouge", "max_ctx_len": 860, "sampling_params_1": {"n": 1, "min_tokens": 32, "max_tokens": 135, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 5, "max_tokens": 860, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___10"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/rouge2___2.0", "cnndm/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 50.257532835006714, "iterations_since_restore": 1}
	{"rollout/num_samples": 160, "rollout/avg_q1_length": 703.0625, "rollout/std_q1_length": 178.38973999023438, "rollout/gen/avg_score": -792.5718994140625, "rollout/gen/std_score": 1101.183837890625, "rollout/gen/avg_r1_length": 125.0, "rollout/gen/std_r1_length": 26.526918411254883, "rollout/gen/avg_r1_score": -213.2135009765625, "rollout/gen/std_r1_score": 61.63120651245117, "rollout/gen/avg_r1_accuracy": 0.14311319589614868, "rollout/gen/std_r1_accuracy": 0.02128567174077034, "rollout/gen/avg_r2_length": 676.296875, "rollout/gen/std_r2_length": 283.9310607910156, "rollout/gen/avg_r2_score": -937.4114379882812, "rollout/gen/std_r2_score": 1188.065185546875, "rollout/gen/avg_r2_accuracy": 0.1147570013999939, "rollout/gen/std_r2_accuracy": 0.039336565881967545, "rollout/best_game/query_1": "By . David Kent . Hull City will reignite their interest in Sheffield United defender Harry Maguire despite the Blades\u2019 claims they have already accepted an offer. In a bizarre chain of events, the Sky Bet League One club released a statement saying they had accepted an offer from their Yorkshire rivals, only for Hull to claim they had withdrawn interest after their final bid was rejected. A club statement from Hull said: \u2018Sheffield United rejected an offer from the Club five days (Wednesday) ago for defender Harry Maguire. This was the Club\u2019s final offer and we have since entered into discussions with other transfer targets. Wanted: Sheffield United's Harry Maguire could still move to Hull despite the strange circumstances . Rock solid: Sheffield United claimed to have accepted a bid but Hull then denied that . \u2018In light . of Sheffield United\u2019s statement this morning the board will meet with . manager Steve Bruce this afternoon to determine whether we now wish to . pursue the transfer after the South Yorkshire club\u2019s apparent U-turn.\u2019 Earlier on . Monday morning The Blades\u2019 managing director Mal Brannigan told the . club\u2019s website: \u2018We made it clear that we wanted Harry to stay by . offering him a new contract at the end of last season, unfortunately he . and his representatives chose a different path. \u2018However, . as he is under contract at Bramall Lane, we exercised our right to set . our own valuation of him and after numerous bids which did not meet our . expectations, Hull City increased their offer and we have reluctantly . accepted. \u2018As we . have previously stated, Sheffield United are not under pressure to sell . our players but on this occasion it was deemed the best for all parties . and now we move on with the aim of assisting Nigel Clough to strengthen . his squad.\u2019 Wanted: Steve Bruce will consider making another offer for Blades captain Maguire .\nTL;DR: ", "rollout/best_game/query_2": "Given the text: \u00a0Birmingham City \u00a0and \u00a0Wen player Fulham captain Charlton Leroux block the door on top off top action after Hull have sealed a surprise record offering of \u00a377million to Ferdinand. Read more here. The 6 Results of this piece were declared as official last night. Qualify.Submission Pressure - wat:WHBC_LCCC\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " \"Boswell has taught Hull Fans more finely than anyone else exactly why it is that Yeats are so poor buys the title and keeping it until the end.\nThis marks the penultimate instance where Hull Fan groups bet patience tho\"\nIt's a Overbroad mocking disappointed you Harry the promising second downes barely got made with liberation football.", "rollout/best_game/whole_game": "ROLE 0: By . David Kent . Hull City will reignite their interest in Sheffield United defender Harry Maguire despite the Blades\u2019 claims they have already accepted an offer. In a bizarre chain of events, the Sky Bet League One club released a statement saying they had accepted an offer from their Yorkshire rivals, only for Hull to claim they had withdrawn interest after their final bid was rejected. A club statement from Hull said: \u2018Sheffield United rejected an offer from the Club five days (Wednesday) ago for defender Harry Maguire. This was the Club\u2019s final offer and we have since entered into discussions with other transfer targets. Wanted: Sheffield United's Harry Maguire could still move to Hull despite the strange circumstances . Rock solid: Sheffield United claimed to have accepted a bid but Hull then denied that . \u2018In light . of Sheffield United\u2019s statement this morning the board will meet with . manager Steve Bruce this afternoon to determine whether we now wish to . pursue the transfer after the South Yorkshire club\u2019s apparent U-turn.\u2019 Earlier on . Monday morning The Blades\u2019 managing director Mal Brannigan told the . club\u2019s website: \u2018We made it clear that we wanted Harry to stay by . offering him a new contract at the end of last season, unfortunately he . and his representatives chose a different path. \u2018However, . as he is under contract at Bramall Lane, we exercised our right to set . our own valuation of him and after numerous bids which did not meet our . expectations, Hull City increased their offer and we have reluctantly . accepted. \u2018As we . have previously stated, Sheffield United are not under pressure to sell . our players but on this occasion it was deemed the best for all parties . and now we move on with the aim of assisting Nigel Clough to strengthen . his squad.\u2019 Wanted: Steve Bruce will consider making another offer for Blades captain Maguire .\nTL;DR: \nROLE 1: Given the text: \u00a0Birmingham City \u00a0and \u00a0Wen player Fulham captain Charlton Leroux block the door on top off top action after Hull have sealed a surprise record offering of \u00a377million to Ferdinand. Read more here. The 6 Results of this piece were declared as official last night. Qualify.Submission Pressure - wat:WHBC_LCCC\nReconstruct the summarized text to the detailed:\nROLE 2: \"Boswell has taught Hull Fans more finely than anyone else exactly why it is that Yeats are so poor buys the title and keeping it until the end.\nThis marks the penultimate instance where Hull Fan groups bet patience tho\"\nIt's a Overbroad mocking disappointed you Harry the promising second downes barely got made with liberation football.", "rollout/best_game/overall_score": -148.89019740518017, "rollout/best_game/accuracy (r2)": 0.055565242662016866, "_this_batch_num_rl_token": 22498, "num_rl_rollout": 6, "lm_epoch": 0, "rl_epoch": 0, "step": 100, "total_data_token": 135738, "total_rl_token": 424093, "total_lm_token": 0, "total_token": 424093, "completed_steps": 100, "rollout/num_train_sample": 160, "timestamp": 1772076523, "checkpoint_dir_name": null, "done": false, "training_iteration": 9, "trial_id": "284c96d2", "date": "2026-02-26_11-28-43", "time_this_iter_s": 27.017394304275513, "time_total_s": 324.11394357681274, "pid": 222837, "hostname": "candle", "node_ip": "10.2.1.32", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "sim_with_one_golden__cnndm_train", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 0.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 32, "max_new_tokens": 135}, "rollout_config": {"accuracy_w": 1.96282248134428, "len_pen": 1.0, "accuracy_w2": 19.549524484259873, "len_pen2": 1.0, "threshold": 0.01605191133358762, "similarity_fn": "rouge", "max_ctx_len": 860, "sampling_params_1": {"n": 1, "min_tokens": 32, "max_tokens": 135, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 5, "max_tokens": 860, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___10"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/rouge2___2.0", "cnndm/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 77.27492713928223, "iterations_since_restore": 2}
	{"rollout/num_samples": 160, "rollout/avg_q1_length": 644.6875, "rollout/std_q1_length": 200.9344940185547, "rollout/gen/avg_score": -847.5543212890625, "rollout/gen/std_score": 1512.508544921875, "rollout/gen/avg_r1_length": 114.4375, "rollout/gen/std_r1_length": 35.75177764892578, "rollout/gen/avg_r1_score": -190.2052001953125, "rollout/gen/std_r1_score": 80.04623413085938, "rollout/gen/avg_r1_accuracy": 0.13981282711029053, "rollout/gen/std_r1_accuracy": 0.025504738092422485, "rollout/gen/avg_r2_length": 610.40625, "rollout/gen/std_r2_length": 301.576171875, "rollout/gen/avg_r2_score": -1011.8916015625, "rollout/gen/std_r2_score": 1651.1949462890625, "rollout/gen/avg_r2_accuracy": 0.1099642887711525, "rollout/gen/std_r2_accuracy": 0.040685106068849564, "rollout/best_game/query_1": "A bizarre Russian Roulette-style game in which players take turns drawing cards from a deck of cards featuring exploding kittens, weaponized enchiladas and back hair has received almost $2million in funding. The game, a brainchild of Matthew Inman, the creator of the cult comic Oatmeal, gathered the funding via its Kickstarter page within only a few hours after being launched. Originally called Bomb Squad, it wasn't until Mr Inman became involved that the game truly found its quirky appeal. The Exploding Kittens card game has proved hugely popular, with players claiming its Russian Roulette-style suspense is extremely addictive . The Exploding Kittens Kickstarter page (pictured) shows it has almost received $2million in funding within days of being launched . As the creators' explain: 'Exploding Kittens is a highly strategic kitty-powered version of Russian Roulette. Players take turns drawing cards until someone draws an exploding kitten and loses the game. 'The deck is made up of cards that let you avoid exploding by peeking at cards before you draw, forcing your opponent to draw multiple cards, or shuffling the deck. 'So if you're into card games or laser beams or weaponized enchiladas, please help us make this game a reality.' The Kickstarter, launched with the modest goal of attracting $10,000, surpassed even its creators' wildest expectations when it hit $1,734,000. Created by Mr Inman, Elan Lee and Shane Small, the game's appeal lies in its suspense as it edges closer to completion and the chances of drawing an exploding kitten card increase. Mr Inman told\u00a0The Washington Post: 'I've wanted to make a card game for a long time, I just don\u2019t know how to make one with great mechanics and replay value, and Elan and Shane created something really fantastic. 'I suggested that we change the name to Exploding Kittens, and add cards like unicorn pigs and Sasquatches and weaponized back-hair and a bunch of other weird Oatmeal-inspired attack cards. 'From there, our little game was born.' It's not the first time Mr Inman has created a stir among fans for his outrageous ideas. In 2012 after being served with papers for a $20,000 lawsuit filed by rival website 'FunnyJunk', he decided to ignore the legal wheels and produce his own response. After being served with a $20,000 lawsuit in 2012, Exploding Kittens creator Matthew Inman raised the money, photographed himself with it (pictured) and gave it to charity rather than pay the claimaints . He said: 'I'm going to try and raise $20,000 in donations. I'm going to take a photo of the raised money. 'I'm going to mail you that photo, along with this drawing of your mom seducing a Kodiak bear. I'm going to take the money and donate one half to the National Wildlife Federation and the other half to the American Cancer Society.' He later made good on his claim, posting a selfie of himself next to a stack of $20,000 in cash. Mr Inman's cartoons have made him extremely popular across the internet, with examples of his work such as '10 Words You Need To stop Misspelling' and 'What It's Like To Own An Apple Product' being shared widely on social media such as Facebook and Twitter.\nTL;DR: ", "rollout/best_game/query_2": "Given the text: \u00a0Exploding Kittens is a fan recreation of an abandoned felt chicken cartoon That's Facebook right there Schlubs needs no advertisement What went wrong? Like Mr Inman \| who makes Muni (Case after Macly Murray at Todamus, London) \|\| Leave a comment\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " Return to the main map which follows by clicking Save.", "rollout/best_game/whole_game": "ROLE 0: A bizarre Russian Roulette-style game in which players take turns drawing cards from a deck of cards featuring exploding kittens, weaponized enchiladas and back hair has received almost $2million in funding. The game, a brainchild of Matthew Inman, the creator of the cult comic Oatmeal, gathered the funding via its Kickstarter page within only a few hours after being launched. Originally called Bomb Squad, it wasn't until Mr Inman became involved that the game truly found its quirky appeal. The Exploding Kittens card game has proved hugely popular, with players claiming its Russian Roulette-style suspense is extremely addictive . The Exploding Kittens Kickstarter page (pictured) shows it has almost received $2million in funding within days of being launched . As the creators' explain: 'Exploding Kittens is a highly strategic kitty-powered version of Russian Roulette. Players take turns drawing cards until someone draws an exploding kitten and loses the game. 'The deck is made up of cards that let you avoid exploding by peeking at cards before you draw, forcing your opponent to draw multiple cards, or shuffling the deck. 'So if you're into card games or laser beams or weaponized enchiladas, please help us make this game a reality.' The Kickstarter, launched with the modest goal of attracting $10,000, surpassed even its creators' wildest expectations when it hit $1,734,000. Created by Mr Inman, Elan Lee and Shane Small, the game's appeal lies in its suspense as it edges closer to completion and the chances of drawing an exploding kitten card increase. Mr Inman told\u00a0The Washington Post: 'I've wanted to make a card game for a long time, I just don\u2019t know how to make one with great mechanics and replay value, and Elan and Shane created something really fantastic. 'I suggested that we change the name to Exploding Kittens, and add cards like unicorn pigs and Sasquatches and weaponized back-hair and a bunch of other weird Oatmeal-inspired attack cards. 'From there, our little game was born.' It's not the first time Mr Inman has created a stir among fans for his outrageous ideas. In 2012 after being served with papers for a $20,000 lawsuit filed by rival website 'FunnyJunk', he decided to ignore the legal wheels and produce his own response. After being served with a $20,000 lawsuit in 2012, Exploding Kittens creator Matthew Inman raised the money, photographed himself with it (pictured) and gave it to charity rather than pay the claimaints . He said: 'I'm going to try and raise $20,000 in donations. I'm going to take a photo of the raised money. 'I'm going to mail you that photo, along with this drawing of your mom seducing a Kodiak bear. I'm going to take the money and donate one half to the National Wildlife Federation and the other half to the American Cancer Society.' He later made good on his claim, posting a selfie of himself next to a stack of $20,000 in cash. Mr Inman's cartoons have made him extremely popular across the internet, with examples of his work such as '10 Words You Need To stop Misspelling' and 'What It's Like To Own An Apple Product' being shared widely on social media such as Facebook and Twitter.\nTL;DR: \nROLE 1: Given the text: \u00a0Exploding Kittens is a fan recreation of an abandoned felt chicken cartoon That's Facebook right there Schlubs needs no advertisement What went wrong? Like Mr Inman \| who makes Muni (Case after Macly Murray at Todamus, London) \|\| Leave a comment\nReconstruct the summarized text to the detailed:\nROLE 2: Return to the main map which follows by clicking Save.", "rollout/best_game/overall_score": -73.66063307698064, "rollout/best_game/accuracy (r2)": 0.00946047129889032, "_this_batch_num_rl_token": 20630, "num_rl_rollout": 7, "lm_epoch": 0, "rl_epoch": 0, "step": 120, "total_data_token": 156368, "total_rl_token": 520798, "total_lm_token": 0, "total_token": 520798, "completed_steps": 120, "rollout/num_train_sample": 160, "timestamp": 1772076562, "checkpoint_dir_name": null, "done": false, "training_iteration": 10, "trial_id": "284c96d2", "date": "2026-02-26_11-29-22", "time_this_iter_s": 38.773863792419434, "time_total_s": 362.8878073692322, "pid": 222837, "hostname": "candle", "node_ip": "10.2.1.32", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "sim_with_one_golden__cnndm_train", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 0.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 32, "max_new_tokens": 135}, "rollout_config": {"accuracy_w": 1.96282248134428, "len_pen": 1.0, "accuracy_w2": 19.549524484259873, "len_pen2": 1.0, "threshold": 0.01605191133358762, "similarity_fn": "rouge", "max_ctx_len": 860, "sampling_params_1": {"n": 1, "min_tokens": 32, "max_tokens": 135, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 5, "max_tokens": 860, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___10"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/rouge2___2.0", "cnndm/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 116.04879093170166, "iterations_since_restore": 3}
	{"rollout/num_samples": 160, "rollout/avg_q1_length": 698.625, "rollout/std_q1_length": 203.71434020996094, "rollout/gen/avg_score": -826.3539428710938, "rollout/gen/std_score": 1516.7906494140625, "rollout/gen/avg_r1_length": 118.75, "rollout/gen/std_r1_length": 30.32512092590332, "rollout/gen/avg_r1_score": -197.74517822265625, "rollout/gen/std_r1_score": 73.34259033203125, "rollout/gen/avg_r1_accuracy": 0.13280615210533142, "rollout/gen/std_r1_accuracy": 0.023484129458665848, "rollout/gen/avg_r2_length": 580.46875, "rollout/gen/std_r2_length": 315.8414611816406, "rollout/gen/avg_r2_score": -983.5060424804688, "rollout/gen/std_r2_score": 1659.691650390625, "rollout/gen/avg_r2_accuracy": 0.10073377192020416, "rollout/gen/std_r2_accuracy": 0.042013950645923615, "rollout/best_game/query_1": "Goals from Samir Nasri and Pablo Zabaleta ensured Manchester City beat Roma and sealed progression to the knockout stages of this season's Champions League. Results were in their favour on Wednesday night and, along with Chelsea and Arsenal, they are in the pot for the last 16 draw. Here, Sportsmail looks at City's group stage story, and how they came back from the brink... BAYERN MUNICH 1-0 MAN CITY . September 17 . City get off to the worst possible start after Jerome Boateng\u2019s last-gasp winner in Munich. To top it off, Roma have beaten CSKA Moscow 5-1, and things look bleak for Manuel Pellegrini's men. Joe Hart rocks back on his heels in the post-match interview, after a man-of-the-match performance. City are still confident despite two poor results on the opening night, he says. Manchester City goalkeeper Joe Hart stretches but is powerless to keep out Bayern Munich's late winner at the Allianz Arena . Bayern Munich centre back Jerome Boateng, formerly of City, punches the air having raced towards the touchline to celebrate his goal . MAN CITY 1-1 ROMA . September 30 . Sergio Aguero gives City the lead from the penalty spot, but a record-breaking Francesco Totti goal rescues a point for Roma. If City are to progress, these are the games they need to win. At home to their main rivals for the second qualification spot. The Etihad crowd seem to have given up hope too - the attendance is 10,000 under capacity. Sportsmail's Martin Samuel summed up the performance: . 'Roma probably shaded it here, and certainly Joe Hart was the busier goalkeeper. The hero in defeat against Bayern Munich, he saved City once again, and was unfortunate that a slip at a vital moment prevented him closing out what proved to be the equaliser.' Manuel Pellegrini's Manchester City took an early fourth-minute lead, but could not hold on for three vital points against Roma . Francesco Totti scores Roma's equalising goal against Manchester City, becoming the Champions League's oldest scorer in the process . CSKA MOSCOW 2-2 MAN CITY . October 21 . The visitors coast to a 2-0 half-time lead but throw away a vital victory. Sergio Aguero and James Milner are on the scoresheet as the first 45 minutes end, and things are looking rosy from a City perspective. Yes, they had one point from their first two matches, but a positive away win will do them the world of good. A disastrous second-half performance follows though, as Seydou Doumbia and\u00a0Bebras Natcho steal a point for the Russians. Sportsmail's Ian Ladyman reports from the game: . 'Freezing temperatures, a stadium echoing to the boorish, boozed-up chanting of CSKA fans who shouldn\u2019t have been anywhere near the place and a late penalty decision so dreadful it\u2019s tempting to check the Hungarian referee\u2019s passport for Russian lineage.' \u00a0It wasn't City's night. Manchester City players gather to congratulate goalscorer James Milner while CSKA Moscow goalkeeper Igor Akinfeev looks down . Hart dives to his left but is unable to keep out Bibras Natcho's late penalty after Aleksandar Kolarov brought down Doumbia in the area . MAN CITY 1-2 CSKA MOSCOW . November 5 . Things go from bad to worse for City. Four games, two points. Both Fernandinho and Yaya Toure see red, and a double from Seydou Doumbia secures an unlikely away win for the Russian side. Sportsmail's Martin Samuel was at the game: . 'Even by the dismal standards of Manchester City in the Champions League this was a low. Defeat at home to what had been considered the weakest team in Group E, reduced to nine players by the end, never ahead at any stage of the match, trailing after two minutes, Toure banned for the rest of the campaign, this was close to as bad as it gets.' City captain Vincent Kompany has", "rollout/best_game/query_2": "Given the text: to find a temper tantrum after stretchelling in to join centre back Ramires. Swansea City sporting director Neil Warnock suggests a suspension weelet: . 64 minutes later.\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": "\nWide player sorrowful. sensible vessels to aggravate your Indian problems [AGL video]\nAnd there it would triumph:\n1) milk, stock, the milk, milk, these are courage swords which you will yield. no 1862 etc. http://limitandreach.blogspot.co.uk/ This could serve as a follow-up to this test wherein Annie Lyons' Marilyn Mov Hanbij \". The shot knocked John last weekend. i was punctured as John woke me up where I was running from Jan when Harry died on the operating table of User Apologies , on April 09.", "rollout/best_game/whole_game": "ROLE 0: Goals from Samir Nasri and Pablo Zabaleta ensured Manchester City beat Roma and sealed progression to the knockout stages of this season's Champions League. Results were in their favour on Wednesday night and, along with Chelsea and Arsenal, they are in the pot for the last 16 draw. Here, Sportsmail looks at City's group stage story, and how they came back from the brink... BAYERN MUNICH 1-0 MAN CITY . September 17 . City get off to the worst possible start after Jerome Boateng\u2019s last-gasp winner in Munich. To top it off, Roma have beaten CSKA Moscow 5-1, and things look bleak for Manuel Pellegrini's men. Joe Hart rocks back on his heels in the post-match interview, after a man-of-the-match performance. City are still confident despite two poor results on the opening night, he says. Manchester City goalkeeper Joe Hart stretches but is powerless to keep out Bayern Munich's late winner at the Allianz Arena . Bayern Munich centre back Jerome Boateng, formerly of City, punches the air having raced towards the touchline to celebrate his goal . MAN CITY 1-1 ROMA . September 30 . Sergio Aguero gives City the lead from the penalty spot, but a record-breaking Francesco Totti goal rescues a point for Roma. If City are to progress, these are the games they need to win. At home to their main rivals for the second qualification spot. The Etihad crowd seem to have given up hope too - the attendance is 10,000 under capacity. Sportsmail's Martin Samuel summed up the performance: . 'Roma probably shaded it here, and certainly Joe Hart was the busier goalkeeper. The hero in defeat against Bayern Munich, he saved City once again, and was unfortunate that a slip at a vital moment prevented him closing out what proved to be the equaliser.' Manuel Pellegrini's Manchester City took an early fourth-minute lead, but could not hold on for three vital points against Roma . Francesco Totti scores Roma's equalising goal against Manchester City, becoming the Champions League's oldest scorer in the process . CSKA MOSCOW 2-2 MAN CITY . October 21 . The visitors coast to a 2-0 half-time lead but throw away a vital victory. Sergio Aguero and James Milner are on the scoresheet as the first 45 minutes end, and things are looking rosy from a City perspective. Yes, they had one point from their first two matches, but a positive away win will do them the world of good. A disastrous second-half performance follows though, as Seydou Doumbia and\u00a0Bebras Natcho steal a point for the Russians. Sportsmail's Ian Ladyman reports from the game: . 'Freezing temperatures, a stadium echoing to the boorish, boozed-up chanting of CSKA fans who shouldn\u2019t have been anywhere near the place and a late penalty decision so dreadful it\u2019s tempting to check the Hungarian referee\u2019s passport for Russian lineage.' \u00a0It wasn't City's night. Manchester City players gather to congratulate goalscorer James Milner while CSKA Moscow goalkeeper Igor Akinfeev looks down . Hart dives to his left but is unable to keep out Bibras Natcho's late penalty after Aleksandar Kolarov brought down Doumbia in the area . MAN CITY 1-2 CSKA MOSCOW . November 5 . Things go from bad to worse for City. Four games, two points. Both Fernandinho and Yaya Toure see red, and a double from Seydou Doumbia secures an unlikely away win for the Russian side. Sportsmail's Martin Samuel was at the game: . 'Even by the dismal standards of Manchester City in the Champions League this was a low. Defeat at home to what had been considered the weakest team in Group E, reduced to nine players by the end, never ahead at any stage of the match, trailing after two minutes, Toure banned for the rest of the campaign, this was close to as bad as it gets.' City captain Vincent Kompany has\nROLE 1: Given the text: to find a temper tantrum after stretchelling in to join centre back Ramires. Swansea City sporting director Neil Warnock suggests a suspension weelet: . 64 minutes later.\nReconstruct the summarized text to the detailed:\nROLE 2: \nWide player sorrowful. sensible vessels to aggravate your Indian problems [AGL video]\nAnd there it would triumph:\n1) milk, stock, the milk, milk, these are courage swords which you will yield. no 1862 etc. http://limitandreach.blogspot.co.uk/ This could serve as a follow-up to this test wherein Annie Lyons' Marilyn Mov Hanbij \". The shot knocked John last weekend. i was punctured as John woke me up where I was running from Jan when Harry died on the operating table of User Apologies , on April 09.", "rollout/best_game/overall_score": -163.42129571345663, "rollout/best_game/accuracy (r2)": 0.032670076445031074, "_this_batch_num_rl_token": 22356, "num_rl_rollout": 8, "lm_epoch": 0, "rl_epoch": 0, "step": 140, "total_data_token": 178724, "total_rl_token": 612344, "total_lm_token": 0, "total_token": 612344, "completed_steps": 140, "rollout/num_train_sample": 160, "timestamp": 1772076600, "checkpoint_dir_name": null, "done": false, "training_iteration": 11, "trial_id": "284c96d2", "date": "2026-02-26_11-30-00", "time_this_iter_s": 38.375166177749634, "time_total_s": 401.2629735469818, "pid": 222837, "hostname": "candle", "node_ip": "10.2.1.32", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "sim_with_one_golden__cnndm_train", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 0.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 32, "max_new_tokens": 135}, "rollout_config": {"accuracy_w": 1.96282248134428, "len_pen": 1.0, "accuracy_w2": 19.549524484259873, "len_pen2": 1.0, "threshold": 0.01605191133358762, "similarity_fn": "rouge", "max_ctx_len": 860, "sampling_params_1": {"n": 1, "min_tokens": 32, "max_tokens": 135, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 5, "max_tokens": 860, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___10"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/rouge2___2.0", "cnndm/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 154.4239571094513, "iterations_since_restore": 4}
	{"rollout/num_samples": 160, "rollout/avg_q1_length": 692.34375, "rollout/std_q1_length": 183.80364990234375, "rollout/gen/avg_score": -957.0548706054688, "rollout/gen/std_score": 1824.4251708984375, "rollout/gen/avg_r1_length": 124.125, "rollout/gen/std_r1_length": 25.98231315612793, "rollout/gen/avg_r1_score": -209.20632934570312, "rollout/gen/std_r1_score": 63.76570510864258, "rollout/gen/avg_r1_accuracy": 0.14358803629875183, "rollout/gen/std_r1_accuracy": 0.02135639637708664, "rollout/gen/avg_r2_length": 589.4453125, "rollout/gen/std_r2_length": 312.4082946777344, "rollout/gen/avg_r2_score": -1144.0169677734375, "rollout/gen/std_r2_score": 1997.515625, "rollout/gen/avg_r2_accuracy": 0.10961095988750458, "rollout/gen/std_r2_accuracy": 0.043547771871089935, "rollout/best_game/query_1": "White House Press Secretary Jay Carney denied on Thursday a television news reporter's day-old claim that reporters often 'provide the questions to him in advance,' before his daily briefings, and that he sometimes provides answers on paper before taking the podium. WIthin hours, the Phoenix reporter at the center of a quick-drying controversy admitted she got the whole thing wrong. Phoenix news anchor Catherine Anaya reported Wednesday night on KPHO-TV5 that in an 'off the record' meeting, Carney had told a handful of local TV reporters that White House correspondents often tell him before daily briefings what they'll be asking. 'If only this were true,' Carney told MailOnline Thursday morning. By mid-afternoon Anaya, fresh of a Washington-to-Phoenix flight, walked parts of her story back in an email to MailOnline. But she insisted that she herself was asked to submit a question in advance for Carney on Wednesday afternoon. SCROLL DOWN FOR VIDEO . KPHO-TV anchor Catherine Anaya went to Washington to participate in a carefully managed television journalism cattle-call at the White House, and reported that Press Secretary Jay Carney's daily briefings are often just for show . Carney denied getting a heads-up about daily briefing questions: 'If only this were true' 'As a local journalist I had no issue providing my proposed question in advance,' she told MailOnline, 'because I wanted to make sure it was an appropriate q[uestion] for a national briefing and I wanted to make sure it was appropriate for Mr. Carney.' '[B]ut in discussing it with a staff member the night before, we decided I would save it for the president. I was attempting to not waste national time on a local question, but in my attempt at explaining that I unintentionally made it sound like that experience applied to everyone.' 'That is my mistake,' Anaya added, 'and I own up to it.' Meanwhile, KPHO-TV issued a separate statement attributed to Anaya, but the CBS affiliate station quickly deleted it from its website. Assignment Editor Scott Davis told MailOnline that it 'apparently ... was not the correct statement.' Anaya's on-air commentary remains on the website, however. 'We started here shortly after 8 o'clock with a coffee with Press Secretary Jay Carney inside his office in the West Wing,' she said on the air, before making a stunning breach of journalistic protocol by reporting on an 'off the record' meeting and airing a photo of it. KPHO-TV . issued this extended quote Thursday afternoon from Catherine Anaya, and . then quickly withdrew it, saying in an email that 'apparently that was . not the correct statement': . 'It seems much had been inferred about my observations following my White House visit yesterday. 'First, . I did not take notes during our coffee with Jay Carney because it was . off the record. But when I referenced the meeting in my live reports I . did say that it was a great opportunity to talk about the challenges of . his day and how he has to be so well-versed on many topics each day. 'In . my live report I also wanted to share my impression of my experience in . getting a question answered during the briefing. I was indeed asked to . provide my question in advance. Because my question was largely of local . interest, I chose to save it for my interview with the President . instead. 'My mistake was to . lump that experience with my coffee meeting reference, inadvertently . giving Mr. Carney credit for that when in fact it did not come from him. I regret giving anyone the impression that it was from conversation I . had with Mr. Carney. I do not attend those briefings regularly and cannot speak directly to the process for non-visiting journalists. None of my observations stemmed from my off-the-record meeting with Jay Carney.' 'And this was off-the-record,' she . reported, 'so we were able to ask him all about some of the preparation . that he does on a regular basis for talking to the press in his daily . press briefings. He showed us a very long list of items that he has to . be well-versed on every single", "rollout/best_game/query_2": "Given the text: document that he has to present.' On Author's statement: _____________________ The publication places large shares in fact and style in the Washington Post on the subject of federal rules of the daily\u2026and sixty percent nationally. Based on recent eminent jurisprudence searching on Capitol Hill \u2013 except surely George Washington \u2013 what departed in my public way from this work? Brom is Niels Grund v. New York City Circuit Court \u2013 (NELC, N.Y. June 26, 2006), 341 U.S. 1211; Anderson v. Los Angeles CASCADE, 15 Pet. 219, 132 P.2d 980, 800 (2007) (plur\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " 9:59 PM.: Identical words used throughout the quotation: \"Supervised by\"", "rollout/best_game/whole_game": "ROLE 0: White House Press Secretary Jay Carney denied on Thursday a television news reporter's day-old claim that reporters often 'provide the questions to him in advance,' before his daily briefings, and that he sometimes provides answers on paper before taking the podium. WIthin hours, the Phoenix reporter at the center of a quick-drying controversy admitted she got the whole thing wrong. Phoenix news anchor Catherine Anaya reported Wednesday night on KPHO-TV5 that in an 'off the record' meeting, Carney had told a handful of local TV reporters that White House correspondents often tell him before daily briefings what they'll be asking. 'If only this were true,' Carney told MailOnline Thursday morning. By mid-afternoon Anaya, fresh of a Washington-to-Phoenix flight, walked parts of her story back in an email to MailOnline. But she insisted that she herself was asked to submit a question in advance for Carney on Wednesday afternoon. SCROLL DOWN FOR VIDEO . KPHO-TV anchor Catherine Anaya went to Washington to participate in a carefully managed television journalism cattle-call at the White House, and reported that Press Secretary Jay Carney's daily briefings are often just for show . Carney denied getting a heads-up about daily briefing questions: 'If only this were true' 'As a local journalist I had no issue providing my proposed question in advance,' she told MailOnline, 'because I wanted to make sure it was an appropriate q[uestion] for a national briefing and I wanted to make sure it was appropriate for Mr. Carney.' '[B]ut in discussing it with a staff member the night before, we decided I would save it for the president. I was attempting to not waste national time on a local question, but in my attempt at explaining that I unintentionally made it sound like that experience applied to everyone.' 'That is my mistake,' Anaya added, 'and I own up to it.' Meanwhile, KPHO-TV issued a separate statement attributed to Anaya, but the CBS affiliate station quickly deleted it from its website. Assignment Editor Scott Davis told MailOnline that it 'apparently ... was not the correct statement.' Anaya's on-air commentary remains on the website, however. 'We started here shortly after 8 o'clock with a coffee with Press Secretary Jay Carney inside his office in the West Wing,' she said on the air, before making a stunning breach of journalistic protocol by reporting on an 'off the record' meeting and airing a photo of it. KPHO-TV . issued this extended quote Thursday afternoon from Catherine Anaya, and . then quickly withdrew it, saying in an email that 'apparently that was . not the correct statement': . 'It seems much had been inferred about my observations following my White House visit yesterday. 'First, . I did not take notes during our coffee with Jay Carney because it was . off the record. But when I referenced the meeting in my live reports I . did say that it was a great opportunity to talk about the challenges of . his day and how he has to be so well-versed on many topics each day. 'In . my live report I also wanted to share my impression of my experience in . getting a question answered during the briefing. I was indeed asked to . provide my question in advance. Because my question was largely of local . interest, I chose to save it for my interview with the President . instead. 'My mistake was to . lump that experience with my coffee meeting reference, inadvertently . giving Mr. Carney credit for that when in fact it did not come from him. I regret giving anyone the impression that it was from conversation I . had with Mr. Carney. I do not attend those briefings regularly and cannot speak directly to the process for non-visiting journalists. None of my observations stemmed from my off-the-record meeting with Jay Carney.' 'And this was off-the-record,' she . reported, 'so we were able to ask him all about some of the preparation . that he does on a regular basis for talking to the press in his daily . press briefings. He showed us a very long list of items that he has to . be well-versed on every single\nROLE 1: Given the text: document that he has to present.' On Author's statement: _____________________ The publication places large shares in fact and style in the Washington Post on the subject of federal rules of the daily\u2026and sixty percent nationally. Based on recent eminent jurisprudence searching on Capitol Hill \u2013 except surely George Washington \u2013 what departed in my public way from this work? Brom is Niels Grund v. New York City Circuit Court \u2013 (NELC, N.Y. June 26, 2006), 341 U.S. 1211; Anderson v. Los Angeles CASCADE, 15 Pet. 219, 132 P.2d 980, 800 (2007) (plur\nReconstruct the summarized text to the detailed:\nROLE 2: 9:59 PM.: Identical words used throughout the quotation: \"Supervised by\"", "rollout/best_game/overall_score": -260.2453797570889, "rollout/best_game/accuracy (r2)": 0.0019393939393939393, "_this_batch_num_rl_token": 22155, "num_rl_rollout": 9, "lm_epoch": 0, "rl_epoch": 0, "step": 160, "total_data_token": 200879, "total_rl_token": 695627, "total_lm_token": 0, "total_token": 695627, "completed_steps": 160, "rollout/num_train_sample": 160, "timestamp": 1772076638, "checkpoint_dir_name": null, "done": false, "training_iteration": 12, "trial_id": "284c96d2", "date": "2026-02-26_11-30-38", "time_this_iter_s": 37.656912088394165, "time_total_s": 438.919885635376, "pid": 222837, "hostname": "candle", "node_ip": "10.2.1.32", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "sim_with_one_golden__cnndm_train", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 0.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 32, "max_new_tokens": 135}, "rollout_config": {"accuracy_w": 1.96282248134428, "len_pen": 1.0, "accuracy_w2": 19.549524484259873, "len_pen2": 1.0, "threshold": 0.01605191133358762, "similarity_fn": "rouge", "max_ctx_len": 860, "sampling_params_1": {"n": 1, "min_tokens": 32, "max_tokens": 135, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 5, "max_tokens": 860, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___10"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/rouge2___2.0", "cnndm/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 192.08086919784546, "iterations_since_restore": 5}
	{"rollout/num_samples": 160, "rollout/avg_q1_length": 730.46875, "rollout/std_q1_length": 181.73641967773438, "rollout/gen/avg_score": -894.6920166015625, "rollout/gen/std_score": 1501.9356689453125, "rollout/gen/avg_r1_length": 131.5, "rollout/gen/std_r1_length": 13.830774307250977, "rollout/gen/avg_r1_score": -225.9990234375, "rollout/gen/std_r1_score": 41.582061767578125, "rollout/gen/avg_r1_accuracy": 0.140152245759964, "rollout/gen/std_r1_accuracy": 0.018491854891180992, "rollout/gen/avg_r2_length": 653.546875, "rollout/gen/std_r2_length": 277.52337646484375, "rollout/gen/avg_r2_score": -1061.865234375, "rollout/gen/std_r2_score": 1637.970703125, "rollout/gen/avg_r2_accuracy": 0.11166971921920776, "rollout/gen/std_r2_accuracy": 0.03779705986380577, "rollout/best_game/query_1": "The Internet got its Christmas wish to #SaveBela. Bela is now being sent to live at a Utah animal sanctuary. The Indiana German shepherd tugged at collective heartstrings last week when it was revealed he could be euthanized due to a stipulation in his late owner's will. But Bela will go to Best Friends Animal Society, the group revealed on its blog Tuesday. \"We are thrilled to announce that Bela has been released to Best Friends Animal Society and we are currently arranging his transport to our sanctuary in Kanab, Utah,\" wrote Francis Battista, vice chair of Best Friends. People across the country learned of 9-year-old Bela's story through media reports last week that emphasized a provision in the will of his owner, Connie Ley, that said the dog should be put to sleep and his ashes placed with Ley's ashes if a suitable home couldn't be found for Bela. The social media hashtag #SaveBela was born after the public expressed outrage at Ley's request to put down a healthy dog. \"Add me to the list of people who would gladly adopt Bela and give her a happy home. Please #SaveBela from her dead owner's stupidity,\" one woman wrote. However Doug Denmure, Ley's attorney, said outsiders didn't know the whole story. Before her death on November 25, Ley worried that Bela's aggressive behavior could pose a danger to other people. \"He could cause damage and inflict bodily harm on strangers, in particular, children,\" Denmure told CNN affiliate WCPO-TV in Cincinnati. \"When (Ley) died, she died at her home and the dog was in there. No one could enter the house because they were concerned the dog would attack.\" The dog was housed in a special kennel at PAWS of Dearborn County Humane Center in Lawrenceburg, Indiana, until a decision could be made. Ley's will left the option to send Bela to the Utah animal sanctuary or to put him to sleep up to a close friend. Ley stated that if it was cost prohibitive or otherwise impossible to send the dog to the shelter, he should be put down and laid to rest with her, according to her attorney. \"Denmure and her designated friend, charged to decide what was best for Bela, were unaware of Best Friends experience in providing appropriate care for dogs like Bela,\" Eric Rayvid, a shelter representative, told WCPO after it was revealed that the dog would be saved. Before the decision was reached, Denmure maintained that everything about Ley's request was legal. \"The dog was being owned by my client and now it's part of her estate,\" he told WCPO. \"And those are her wishes, as far as the future of the dog is concerned. Outsiders don't have the grounds to rewrite the provisions of my client's will and impose what they want.\" The case has raised ethical questions about how much control the deceased should have over the lives of their pets. \"On the whole, we don't allow people to euthanize healthy pets,\" said Edward Queen, a professor of ethics at Emory University. On the other hand, he said, it's not uncommon for unwanted pets to be euthanized. \"As a matter of policy, do we really want people to control the life of a healthy pet on a whim?\" he pondered. \"Assuming the dog is healthy and will be cared for, I'm not sure we want to encourage this.\" Best Friends Animal Society said in its blog post that it's time for a new hashtag: #BelaIsSafe -- but he isn't out of the woods yet. \"Of all those party to the unfolding events following Connie Ley's passing, Bela has experienced the trauma of losing his life-long companion and his home. Our first priority is to do everything we can to make Bela's life whole once again in keeping with Connie Ley's wishes for her beloved dog.\"\nTL;DR: ", "rollout/best_game/query_2": "Given the text: \u00a0Stop keeping pets away from homes where you live, shame abused dogs on social media but do not make them kill animals -- Erin Kelly Join 40 underpaid animals for information and advice http://bit.ly/1GocBYoC\u00a0 Popular LGBT content available on YouTube every weekend: Evil Donald Trump : Trump's Struggle to Become a 'Mean Man.' Taliban Agents Take Christmas Break From Obama Dads : Negotiations Are Rather Absent . NRF Headquarters Urge Canada's BC voters to volunteer to shelter Nigerians \| Altis's Siberian Santa on Still in China's Capital\nReconstruct the summarized text to the detailed:", "rollout/best_game/response_2": " \u00a0The subreddits: posting work, discussions, willows, crambles, Google. Everything is done friend fucking asshole not too shitting garbage i'm starting gaming but identity issues couldn't hurt against ...as for social hunting communism as does Cui bono \u00a0is horror movie is classic 4 other ways the americans have given control. 4 ruled birds and sailor can't both. Darth Vader anything....Reply \u00b7 Report Post", "rollout/best_game/whole_game": "ROLE 0: The Internet got its Christmas wish to #SaveBela. Bela is now being sent to live at a Utah animal sanctuary. The Indiana German shepherd tugged at collective heartstrings last week when it was revealed he could be euthanized due to a stipulation in his late owner's will. But Bela will go to Best Friends Animal Society, the group revealed on its blog Tuesday. \"We are thrilled to announce that Bela has been released to Best Friends Animal Society and we are currently arranging his transport to our sanctuary in Kanab, Utah,\" wrote Francis Battista, vice chair of Best Friends. People across the country learned of 9-year-old Bela's story through media reports last week that emphasized a provision in the will of his owner, Connie Ley, that said the dog should be put to sleep and his ashes placed with Ley's ashes if a suitable home couldn't be found for Bela. The social media hashtag #SaveBela was born after the public expressed outrage at Ley's request to put down a healthy dog. \"Add me to the list of people who would gladly adopt Bela and give her a happy home. Please #SaveBela from her dead owner's stupidity,\" one woman wrote. However Doug Denmure, Ley's attorney, said outsiders didn't know the whole story. Before her death on November 25, Ley worried that Bela's aggressive behavior could pose a danger to other people. \"He could cause damage and inflict bodily harm on strangers, in particular, children,\" Denmure told CNN affiliate WCPO-TV in Cincinnati. \"When (Ley) died, she died at her home and the dog was in there. No one could enter the house because they were concerned the dog would attack.\" The dog was housed in a special kennel at PAWS of Dearborn County Humane Center in Lawrenceburg, Indiana, until a decision could be made. Ley's will left the option to send Bela to the Utah animal sanctuary or to put him to sleep up to a close friend. Ley stated that if it was cost prohibitive or otherwise impossible to send the dog to the shelter, he should be put down and laid to rest with her, according to her attorney. \"Denmure and her designated friend, charged to decide what was best for Bela, were unaware of Best Friends experience in providing appropriate care for dogs like Bela,\" Eric Rayvid, a shelter representative, told WCPO after it was revealed that the dog would be saved. Before the decision was reached, Denmure maintained that everything about Ley's request was legal. \"The dog was being owned by my client and now it's part of her estate,\" he told WCPO. \"And those are her wishes, as far as the future of the dog is concerned. Outsiders don't have the grounds to rewrite the provisions of my client's will and impose what they want.\" The case has raised ethical questions about how much control the deceased should have over the lives of their pets. \"On the whole, we don't allow people to euthanize healthy pets,\" said Edward Queen, a professor of ethics at Emory University. On the other hand, he said, it's not uncommon for unwanted pets to be euthanized. \"As a matter of policy, do we really want people to control the life of a healthy pet on a whim?\" he pondered. \"Assuming the dog is healthy and will be cared for, I'm not sure we want to encourage this.\" Best Friends Animal Society said in its blog post that it's time for a new hashtag: #BelaIsSafe -- but he isn't out of the woods yet. \"Of all those party to the unfolding events following Connie Ley's passing, Bela has experienced the trauma of losing his life-long companion and his home. Our first priority is to do everything we can to make Bela's life whole once again in keeping with Connie Ley's wishes for her beloved dog.\"\nTL;DR: \nROLE 1: Given the text: \u00a0Stop keeping pets away from homes where you live, shame abused dogs on social media but do not make them kill animals -- Erin Kelly Join 40 underpaid animals for information and advice http://bit.ly/1GocBYoC\u00a0 Popular LGBT content available on YouTube every weekend: Evil Donald Trump : Trump's Struggle to Become a 'Mean Man.' Taliban Agents Take Christmas Break From Obama Dads : Negotiations Are Rather Absent . NRF Headquarters Urge Canada's BC voters to volunteer to shelter Nigerians \| Altis's Siberian Santa on Still in China's Capital\nReconstruct the summarized text to the detailed:\nROLE 2: \u00a0The subreddits: posting work, discussions, willows, crambles, Google. Everything is done friend fucking asshole not too shitting garbage i'm starting gaming but identity issues couldn't hurt against ...as for social hunting communism as does Cui bono \u00a0is horror movie is classic 4 other ways the americans have given control. 4 ruled birds and sailor can't both. Darth Vader anything....Reply \u00b7 Report Post", "rollout/best_game/overall_score": -209.28753991912282, "rollout/best_game/accuracy (r2)": 0.03734560970132495, "_this_batch_num_rl_token": 23375, "num_rl_rollout": 10, "lm_epoch": 0, "rl_epoch": 0, "step": 180, "total_data_token": 224254, "total_rl_token": 780060, "total_lm_token": 0, "total_token": 780060, "completed_steps": 180, "rollout/num_train_sample": 160, "timestamp": 1772076679, "checkpoint_dir_name": null, "done": false, "training_iteration": 13, "trial_id": "284c96d2", "date": "2026-02-26_11-31-19", "time_this_iter_s": 41.26333284378052, "time_total_s": 480.1832184791565, "pid": 222837, "hostname": "candle", "node_ip": "10.2.1.32", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "sim_with_one_golden__cnndm_train", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 0.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 32, "max_new_tokens": 135}, "rollout_config": {"accuracy_w": 1.96282248134428, "len_pen": 1.0, "accuracy_w2": 19.549524484259873, "len_pen2": 1.0, "threshold": 0.01605191133358762, "similarity_fn": "rouge", "max_ctx_len": 860, "sampling_params_1": {"n": 1, "min_tokens": 32, "max_tokens": 135, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 5, "max_tokens": 860, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___10"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/rouge2___2.0", "cnndm/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 233.34420204162598, "iterations_since_restore": 6}
	{"rl_info/A2G": -0.7692683935165405, "rl_info/entropy": 3.033878803253174, "rl_info/total_token": 4079.0, "rl_info/advantage_b4_norm": -838.5155029296875, "rl_info/kl_w_ref": 0.0, "train/rl_loss": 76.9265365600586, "train/total_loss": 76.9265365600586, "cnndm/rouge1": 0.2059061991742314, "cnndm/rouge2": 0.0704826853875024, "cnndm/rougeL": 0.16808779716677089, "cnndm/rougeLsum": 0.17916823101064794, "cnndm/bertscore_precision": 0.7070501804351806, "cnndm/bertscore_recall": 0.7544343829154968, "cnndm/bertscore_f1": 0.7295159935951233, "eval_agg/avg_all_rougef": 0.15591122818478814, "eval_agg/avg_all_bertf": 0.7295159935951233, "eval_agg/avg_all": 0.44271361088995576, "num_rl_rollout": 10, "lm_epoch": 0, "rl_epoch": 0, "step": 200, "total_data_token": 224254, "total_rl_token": 863607, "total_lm_token": 0, "total_token": 863607, "completed_steps": 200, "tune_objective": 0.529250568348017, "timestamp": 1772076694, "checkpoint_dir_name": "checkpoint_000001", "should_checkpoint": true, "done": true, "training_iteration": 14, "trial_id": "284c96d2", "date": "2026-02-26_11-31-34", "time_this_iter_s": 15.110726118087769, "time_total_s": 495.29394459724426, "pid": 222837, "hostname": "candle", "node_ip": "10.2.1.32", "config": {"train_loop_config": {"dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "sim_with_one_golden__cnndm_train", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "off_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 0.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": false, "sample_config": {"do_sample": true, "min_new_tokens": 1, "temperature": 1.0}, "inference_config": {"do_sample": true, "temperature": 0.0, "min_new_tokens": 32, "max_new_tokens": 135}, "rollout_config": {"accuracy_w": 1.96282248134428, "len_pen": 1.0, "accuracy_w2": 19.549524484259873, "len_pen2": 1.0, "threshold": 0.01605191133358762, "similarity_fn": "rouge", "max_ctx_len": 860, "sampling_params_1": {"n": 1, "min_tokens": 32, "max_tokens": 135, "temperature": 1.0, "logprobs": 5}, "sampling_params_2": {"n": 4, "min_tokens": 5, "max_tokens": 860, "temperature": 1.0, "logprobs": 5}}, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": false, "vllm_sleep": true, "lora": true, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "buffer_max_size": 20000, "trunc_evals": ["cnndm___10"], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": ["cnndm/rouge1___1.0", "cnndm/rouge2___2.0", "cnndm/bertscore_f1___0.25"], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1}}, "time_since_restore": 248.45492815971375, "iterations_since_restore": 7}