Xin-Rui commited on Oct 10, 2025

Commit

7155cf2

verified ·

1 Parent(s): 3fa857a

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +4 -0
.gitignore +2 -0
LLaMA-Factory/examples/deepseed_train.sh +43 -0
Preparation/add_special_tokens.py +51 -0
README.md +461 -0
easyr1/Dockerfile +68 -0
easyr1/Dockerfile.nightly +62 -0
easyr1/cut_dataset.py +47 -0
easyr1/datasets/math500_RL.parquet +3 -0
easyr1/datasets/train_RL.parquet +3 -0
easyr1/delete_checkpoints.py +59 -0
easyr1/examples/8ratio_v1.sh +15 -0
easyr1/examples/8ratio_v1.yaml +88 -0
easyr1/examples/baselines/qwen2_5_vl_3b_clevr.sh +19 -0
easyr1/examples/baselines/qwen2_5_vl_3b_geoqa8k.sh +19 -0
easyr1/examples/format_prompt/math_format.jinja +1 -0
easyr1/examples/format_prompt/r1v_format.jinja +1 -0
easyr1/examples/reward_function/math.py +46 -0
easyr1/examples/reward_function/r1v.py +47 -0
easyr1/pyproject.toml +39 -0
easyr1/requirements.txt +20 -0
easyr1/scripts/model_merger.py +164 -0
easyr1/setup.py +61 -0
easyr1/verl/__init__.py +15 -0
easyr1/verl/__pycache__/__init__.cpython-311.pyc +0 -0
easyr1/verl/__pycache__/protocol.cpython-311.pyc +0 -0
easyr1/verl/models/__init__.py +13 -0
easyr1/verl/models/__pycache__/__init__.cpython-311.pyc +0 -0
easyr1/verl/models/__pycache__/monkey_patch.cpython-311.pyc +0 -0
easyr1/verl/models/monkey_patch.py +32 -0
easyr1/verl/models/transformers/__init__.py +13 -0
easyr1/verl/models/transformers/__pycache__/__init__.cpython-311.pyc +0 -0
easyr1/verl/models/transformers/__pycache__/flash_attention_utils.cpython-311.pyc +0 -0
easyr1/verl/models/transformers/__pycache__/qwen2_vl.cpython-311.pyc +0 -0
easyr1/verl/models/transformers/flash_attention_utils.py +191 -0
easyr1/verl/models/transformers/qwen2_vl.py +189 -0
easyr1/verl/protocol.py +705 -0
easyr1/verl/single_controller/__init__.py +13 -0
easyr1/verl/single_controller/__pycache__/__init__.cpython-311.pyc +0 -0
easyr1/verl/single_controller/base/__init__.py +19 -0
easyr1/verl/single_controller/base/__pycache__/__init__.cpython-311.pyc +0 -0
easyr1/verl/single_controller/base/__pycache__/decorator.cpython-311.pyc +0 -0
easyr1/verl/single_controller/base/__pycache__/worker.cpython-311.pyc +0 -0
easyr1/verl/single_controller/base/__pycache__/worker_group.cpython-311.pyc +0 -0
easyr1/verl/single_controller/base/decorator.py +213 -0
easyr1/verl/single_controller/base/register_center/__init__.py +13 -0
easyr1/verl/single_controller/base/register_center/__pycache__/__init__.cpython-311.pyc +0 -0
easyr1/verl/single_controller/base/register_center/__pycache__/ray.cpython-311.pyc +0 -0
easyr1/verl/single_controller/base/register_center/ray.py +28 -0
easyr1/verl/single_controller/base/worker.py +202 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+evaluation/data/tabmwp/test.jsonl filter=lfs diff=lfs merge=lfs -text
+evaluation/latex2sympy/antlr-4.11.1-complete.jar filter=lfs diff=lfs merge=lfs -text
+evaluation/latex2sympy/gen/__pycache__/PSLexer.cpython-39.pyc filter=lfs diff=lfs merge=lfs -text
+evaluation/latex2sympy/gen/__pycache__/PSParser.cpython-39.pyc filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ Dataset-BudgetThinker/
2	+ upload.py

LLaMA-Factory/examples/deepseed_train.sh ADDED Viewed

	@@ -0,0 +1,43 @@

+export special_token_loss=T
+deepspeed --num_gpus 8 src/train.py \
+--deepspeed examples/deepspeed/ds_z0_config.json \
+--stage sft \
+--model_name_or_path /path/to/your/model \
+--do_train \
+--dataset 8ratio_SFT_below10000 \
+--template deepseek3 \
+--finetuning_type full \
+--output_dir  /path/to/your/output_1 \
+--overwrite_cache \
+--per_device_train_batch_size 2 \
+--gradient_accumulation_steps 8 \
+--lr_scheduler_type cosine \
+--logging_steps 10 \
+--save_steps 2000 \
+--learning_rate 2e-5 \
+--num_train_epochs 2.0 \
+--plot_loss \
+--bf16
+deepspeed --num_gpus 8 src/train.py \
+--deepspeed examples/deepspeed/ds_z0_config.json \
+--stage sft \
+--model_name_or_path /path/to/your/output_1 \
+--do_train \
+--dataset 8ratio_SFT_below10000 \
+--template deepseek3 \
+--finetuning_type full \
+--output_dir  /path/to/your/output_2 \
+--overwrite_cache \
+--per_device_train_batch_size 2 \
+--gradient_accumulation_steps 8 \
+--lr_scheduler_type cosine \
+--logging_steps 10 \
+--save_steps 2000 \
+--learning_rate 2e-5 \
+--num_train_epochs 4.0 \
+--plot_loss \
+--bf16

Preparation/add_special_tokens.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from transformers import AutoTokenizer
+from transformers import AutoModelForCausalLM
+import json
+# model = AutoModelForCausalLM.from_pretrained("/data/sunyi/hf_cache/hub/models--deepseek-ai--DeepSeek-R1-Distill-Qwen-7B/snapshots/6602cadec947dbb53e64f3d8d6425320b2197247")
+# tokenizer = AutoTokenizer.from_pretrained("/data/sunyi/hf_cache/hub/models--deepseek-ai--DeepSeek-R1-Distill-Qwen-7B/snapshots/6602cadec947dbb53e64f3d8d6425320b2197247")
+def gen_special_tokens_json():
+    special_tokens_list = {}
+    for i in range(7):
+        special_tokens_list[f"{i}"] = f"\n<remaining>{i+1}/8</remaining>\n"
+    print(special_tokens_list)
+    with open('./special_tokens.json', 'w') as f:
+        json.dump(special_tokens_list, f)
+    print('special_tokens.json has been generated.')
+if __name__ == "__main__":
+    ori_model_path = '/path/to/your/ori/model'
+    new_model_path = '/path/to/your/new/model'
+    model = AutoModelForCausalLM.from_pretrained(ori_model_path)
+    tokenizer = AutoTokenizer.from_pretrained(ori_model_path)
+    print(model.get_input_embeddings())
+    print(model.lm_head)
+    print(len(tokenizer))
+    gen_special_tokens_json()
+    with open('./special_tokens.json') as f:
+        special_tokens = json.load(f)
+    bins_tokens = [
+        special_tokens[f"{i}"] for i in range(7)
+    ]
+    tokenizer.add_special_tokens({'additional_special_tokens': bins_tokens})
+    model.resize_token_embeddings(len(tokenizer))
+    print('Vocab size after adding special tokens:', len(tokenizer))
+    tokenizer.save_pretrained(new_model_path)
+    model.save_pretrained(new_model_path)
+    model = AutoModelForCausalLM.from_pretrained(new_model_path)
+    tokenizer = AutoTokenizer.from_pretrained(new_model_path)
+    print(model.get_input_embeddings())
+    print(model.lm_head)
+    print(len(tokenizer))

README.md ADDED Viewed

	@@ -0,0 +1,461 @@

+# BudgetThinker: Empowering Budget-aware LLM Reasoning with Control Tokens 🚀
+## Table of Contents
+- [About](#About) 📝
+- [Install](#Install) ⚙️
+- [Preparation](#preparation) 📚
+- [Training](#training) 🏋️‍♂️
+- [Evaluation](#evaluation) 📊
+## About
+This repository contains the code implementation for the paper :
+[BudgetThinker: Empowering Budget-aware LLM Reasoning with Control Tokens](https://www.arxiv.org/abs/2508.17196 ) 🚀
+Our training data can be downloaded from the following links:
+[Dataset-BudgetThinker](https://huggingface.co/datasets/Xin-Rui/Dataset-BudgetThinker/tree/main ) 📥
+The trained model (based on DeepSeek-R1-Distill-Qwen-1.5B) can be obtained from the following link:
+[BudgetThinker-1.5b](https://huggingface.co/Xin-Rui/BudgetThinker-1.5b/tree/main ) 📦
+## Install
+### Clone This Repo 📋
+### SFT-Stage：LLaMA-Factory
+```bash
+git clone git@github.com:hiyouga/LLaMA-Factory.git
+```
+After cloning the repository, follow the instructions in the [Installation Guide](https://llamafactory.readthedocs.io/zh-cn/latest/getting_started/installation.html ) to configure the necessary dependencies. 🔧
+### Modify Environments' Code 🛠️
+You need to modify a piece of code in the transformers library within the environment corresponding to the LLaMA-Factory project. Locate the source code of the transformers library in your environment and replace the loss/loss_utils.py file. For example, using my path:
+```bash
+/home/user/anaconda3/envs/llama-fac/lib/python3.11/site-packages/transformers/loss/loss_utils.py
+↕️
+to_replace/transformers/loss/loss_utils.py
+```
+> Note: The version of the transformers library corresponding to this code is 4.46.1.
+The modified code will allow you to adjust the loss weights for special tokens during training by modifying environment variables. The specific instructions are as follows:
+```bash
+export special_token_loss=F # Set to F to disable loss calculation for special tokens (weight = 0)
+export special_token_loss=T # Set to T to enable loss calculation for special tokens (default weight = 1)
+export special_token_loss=Tn # Set the loss weight for special tokens, where n is a float representing the specified weight value
+# For example: export special_token_loss=T10, which sets the loss weight for special tokens to 10
+```
+### RL-Stage：EasyR1 🎯
+The modified project code is included in the `./easyr1` directory. For environment configuration, please refer to the [EasyR1](https://github.com/hiyouga/EasyR1 ) documentation.
+### Eval-Stage: Qwen2.5-Math 📈
+The modified project code is included in the `./evaluation` directory. For environment configuration, please refer to the [Qwen2.5-Math](https://github.com/QwenLM/Qwen2.5-Math ) documentation.
+### Modify Environments' Code 🛠️
+It is necessary to modify the code in the environments corresponding to the `./easyr1` and `./evaluation` directories. We need to modify the source code of vllm to support the insertion of special tokens during inference:
+#### Method 1: Direct Replacement (Limited to vllm Version 0.7.3) 🔁
+Locate the `worker/model_runner.py` file in the vllm library and replace it:
+```bash
+/home/user/anaconda3/envs/easyr1/lib/python3.11/site-packages/vllm/worker/model_runner.py
+&
+/home/user/anaconda3/envs/QMath/lib/python3.11/site-packages/vllm/worker/model_runner.py
+↕️
+to_replace/vllm/worker/model_runner.py
+```
+> Note: The version of the vllm library corresponding to this code is 0.7.3.
+#### Methods 2: Direct Modification 📝
+Focus on the execute_model function in the `...vllm/worker/model_runner.py` file. The original version is as follows:
+```python
+    @torch.inference_mode()
+    def execute_model(
+        self,
+        model_input: ModelInputForGPUWithSamplingMetadata,
+        kv_caches: List[torch.Tensor],
+        intermediate_tensors: Optional[IntermediateTensors] = None,
+        num_steps: int = 1,
+    ) -> Optional[Union[List[SamplerOutput], IntermediateTensors]]:
+        if num_steps > 1:
+            raise ValueError("num_steps > 1 is not supported in ModelRunner")
+        ... more code ...
+        ... more code ...
+        # Compute the logits in the last pipeline stage.
+        if not get_pp_group().is_last_rank:
+            return hidden_or_intermediate_states
+        logits = self.model.compute_logits(hidden_or_intermediate_states,
+                                           model_input.sampling_metadata)
+        if not self.is_driver_worker:
+            return []
+        # Sample the next token.
+        output: SamplerOutput = self.model.sample(
+            logits=logits,
+            sampling_metadata=model_input.sampling_metadata,
+        )
+        if self.return_hidden_states:
+            # we only need to pass hidden states of most recent token
+            assert model_input.sampling_metadata is not None
+            indices = model_input.sampling_metadata.selected_token_indices
+            if model_input.is_prompt:
+                hidden_states = hidden_or_intermediate_states.index_select(
+                    0, indices)
+            elif decode_meta.use_cuda_graph:
+                hidden_states = hidden_or_intermediate_states[:len(indices)]
+            else:
+                hidden_states = hidden_or_intermediate_states
+            output.hidden_states = hidden_states
+        return [output]
+```
+Modify the code as follows:
+```python
+    @torch.inference_mode()
+    def execute_model(
+        self,
+        model_input: ModelInputForGPUWithSamplingMetadata,
+        kv_caches: List[torch.Tensor],
+        intermediate_tensors: Optional[IntermediateTensors] = None,
+        num_steps: int = 1,
+    ) -> Optional[Union[List[SamplerOutput], IntermediateTensors]]:
+        if num_steps > 1:
+            raise ValueError("num_steps > 1 is not supported in ModelRunner")
+        ... more code ...
+        ... more code ...
+        # Compute the logits in the last pipeline stage.
+        if not get_pp_group().is_last_rank:
+            return hidden_or_intermediate_states
+        logits = self.model.compute_logits(hidden_or_intermediate_states,
+                                           model_input.sampling_metadata)
+        if not self.is_driver_worker:
+            return []
+        # Sample the next token.
+        output: SamplerOutput = self.model.sample(
+            logits=logits,
+            sampling_metadata=model_input.sampling_metadata,
+        )
+        #! >>>>>>>>>>> add remaining tokens to output <<<<<<<<<<<<
+        import os
+        if os.getenv("remaining", "remaing") == "remaing":
+            special_tokens = [151665+i for i in range(400)]
+            for seq_id in range(len(model_input.sampling_metadata.seq_groups)):
+                prompt_token_ids = next(iter(model_input.sampling_metadata.seq_groups[seq_id].seq_data.values())).prompt_token_ids
+                output_token_ids_till_now = next(iter(model_input.sampling_metadata.seq_groups[seq_id].seq_data.values())).output_token_ids
+                # reversely iterate outputtoken_ids_till_now, which is a tuple, to find the last special token
+                last_special_token_idx, last_special_token = None, None
+                for idx in range(len(output_token_ids_till_now)-1, -1, -1):
+                    token_id = output_token_ids_till_now[idx]
+                    if token_id in special_tokens:
+                        last_special_token_idx = idx
+                        last_special_token = token_id
+                        break
+                if last_special_token == 151665:  # has reached the last special token of <remaining 50>
+                    continue
+                if last_special_token_idx is not None:
+                    distance_to_last_special_token = len(output_token_ids_till_now) - last_special_token_idx - 1
+                    if distance_to_last_special_token == 50:
+                        output.outputs[seq_id].samples[0].output_token = last_special_token - 1
+                        former_key = list(output.outputs[seq_id].samples[0].logprobs.keys())[0]
+                        output.outputs[seq_id].samples[0].logprobs[last_special_token - 1] = list(output.outputs[seq_id].samples[0].logprobs.values())[0]
+                        # delete former key-value pair
+                        #g
+                        # print(f"former_key = {former_key}")
+                        # print(f"last_special_token - 1 = {last_special_token - 1}")
+                        if former_key == last_special_token -1:
+                            print("&"*50 + f"former_key == last_special_token -1 == {former_key}" + "!"*50)
+                        else:
+                            del output.outputs[seq_id].samples[0].logprobs[former_key]
+                        #g
+                        # del output.outputs[seq_id].samples[0].logprobs[former_key]
+                else:  # there has not been any special token in the output
+                    last_special_token = None
+                    for prompt_token_id in prompt_token_ids:
+                        if prompt_token_id in special_tokens:
+                            last_special_token = prompt_token_id
+                            break
+                    if last_special_token is not None:
+                        if len(output_token_ids_till_now) == 50:
+                            output.outputs[seq_id].samples[0].output_token = last_special_token - 1
+                            former_key = list(output.outputs[seq_id].samples[0].logprobs.keys())[0]
+                            output.outputs[seq_id].samples[0].logprobs[last_special_token - 1] = list(output.outputs[seq_id].samples[0].logprobs.values())[0]
+                            #g
+                            # print(f"former_key = {former_key}")
+                            # print(f"last_special_token - 1 = {last_special_token - 1}")
+                            if former_key == last_special_token -1:
+                                print("#"*50 + f"former_key == last_special_token -1 == {former_key}" + "!"*50)
+                            else:
+                                del output.outputs[seq_id].samples[0].logprobs[former_key]
+                            #g
+                            # del output.outputs[seq_id].samples[0].logprobs[former_key]
+        elif "ratio" in os.getenv("remaining", "remaing"):
+            N = int(os.getenv("remaining", "remaing").replace("ratio", ""))
+            assert os.getenv("budget") is not None
+            budget = int(os.environ["budget"])
+            delta = budget // N + 1
+            special_tokens = [151665+i for i in range(N-1)]
+            for seq_id in range(len(model_input.sampling_metadata.seq_groups)):
+                prompt_token_ids = next(iter(model_input.sampling_metadata.seq_groups[seq_id].seq_data.values())).prompt_token_ids
+                output_token_ids_till_now = next(iter(model_input.sampling_metadata.seq_groups[seq_id].seq_data.values())).output_token_ids
+                # reversely iterate outputtoken_ids_till_now, which is a tuple, to find the last special token
+                last_special_token_idx, last_special_token = None, None
+                for idx in range(len(output_token_ids_till_now)-1, -1, -1):
+                    token_id = output_token_ids_till_now[idx]
+                    if token_id in special_tokens:
+                        last_special_token_idx = idx
+                        last_special_token = token_id
+                        break
+                if last_special_token == 151665:  # has reached the last special token of <remaining 50>
+                    continue
+                if last_special_token_idx is not None:
+                    distance_to_last_special_token = len(output_token_ids_till_now) - last_special_token_idx - 1
+                    if distance_to_last_special_token == delta:
+                        output.outputs[seq_id].samples[0].output_token = last_special_token - 1
+                        former_key = list(output.outputs[seq_id].samples[0].logprobs.keys())[0]
+                        output.outputs[seq_id].samples[0].logprobs[last_special_token - 1] = list(output.outputs[seq_id].samples[0].logprobs.values())[0]
+                        # delete former key-value pair
+                        #g
+                        # print(f"former_key = {former_key}")
+                        # print(f"last_special_token - 1 = {last_special_token - 1}")
+                        if former_key == last_special_token -1:
+                            print("&"*50 + f"former_key == last_special_token -1 == {former_key}" + "!"*50)
+                        else:
+                            del output.outputs[seq_id].samples[0].logprobs[former_key]
+                        #g
+                        # del output.outputs[seq_id].samples[0].logprobs[former_key]
+                else:  # there has not been any special token in the output
+                    last_special_token = 151671 + 1 #g 手动设置成7/8 + 1的token，否则全是从6/8开始输出。
+                    if last_special_token is not None:
+                        if len(output_token_ids_till_now) == delta:
+                            output.outputs[seq_id].samples[0].output_token = last_special_token - 1
+                            former_key = list(output.outputs[seq_id].samples[0].logprobs.keys())[0]
+                            output.outputs[seq_id].samples[0].logprobs[last_special_token - 1] = list(output.outputs[seq_id].samples[0].logprobs.values())[0]
+                            #g
+                            # print(f"former_key = {former_key}")
+                            # print(f"last_special_token - 1 = {last_special_token - 1}")
+                            if former_key == last_special_token -1:
+                                print("#"*50 + f"former_key == last_special_token -1 == {former_key}" + "!"*50)
+                            else:
+                                del output.outputs[seq_id].samples[0].logprobs[former_key]
+                            #g
+                            # del output.outputs[seq_id].samples[0].logprobs[former_key]
+        elif os.getenv("remaining", "remaing") == "remaining250":
+            special_tokens = [151665+i for i in range(40)]
+            for seq_id in range(len(model_input.sampling_metadata.seq_groups)):
+                prompt_token_ids = next(iter(model_input.sampling_metadata.seq_groups[seq_id].seq_data.values())).prompt_token_ids
+                output_token_ids_till_now = next(iter(model_input.sampling_metadata.seq_groups[seq_id].seq_data.values())).output_token_ids
+                # reversely iterate outputtoken_ids_till_now, which is a tuple, to find the last special token
+                last_special_token_idx, last_special_token = None, None
+                for idx in range(len(output_token_ids_till_now)-1, -1, -1):
+                    token_id = output_token_ids_till_now[idx]
+                    if token_id in special_tokens:
+                        last_special_token_idx = idx
+                        last_special_token = token_id
+                        break
+                if last_special_token == 151665:  # has reached the last special token of <remaining 50>
+                    continue
+                if last_special_token_idx is not None:
+                    distance_to_last_special_token = len(output_token_ids_till_now) - last_special_token_idx - 1
+                    if distance_to_last_special_token == 250:
+                        output.outputs[seq_id].samples[0].output_token = last_special_token - 1
+                        former_key = list(output.outputs[seq_id].samples[0].logprobs.keys())[0]
+                        output.outputs[seq_id].samples[0].logprobs[last_special_token - 1] = list(output.outputs[seq_id].samples[0].logprobs.values())[0]
+                        # delete former key-value pair
+                        #g
+                        # print(f"former_key = {former_key}")
+                        # print(f"last_special_token - 1 = {last_special_token - 1}")
+                        if former_key == last_special_token -1:
+                            print("&"*50 + f"former_key == last_special_token -1 == {former_key}" + "!"*50)
+                        else:
+                            del output.outputs[seq_id].samples[0].logprobs[former_key]
+                        #g
+                        # del output.outputs[seq_id].samples[0].logprobs[former_key]
+                else:  # there has not been any special token in the output
+                    last_special_token = None
+                    for prompt_token_id in prompt_token_ids:
+                        if prompt_token_id in special_tokens:
+                            last_special_token = prompt_token_id
+                            break
+                    if last_special_token is not None:
+                        if len(output_token_ids_till_now) == 250:
+                            output.outputs[seq_id].samples[0].output_token = last_special_token - 1
+                            former_key = list(output.outputs[seq_id].samples[0].logprobs.keys())[0]
+                            output.outputs[seq_id].samples[0].logprobs[last_special_token - 1] = list(output.outputs[seq_id].samples[0].logprobs.values())[0]
+                            #g
+                            # print(f"former_key = {former_key}")
+                            # print(f"last_special_token - 1 = {last_special_token - 1}")
+                            if former_key == last_special_token -1:
+                                print("#"*50 + f"former_key == last_special_token -1 == {former_key}" + "!"*50)
+                            else:
+                                del output.outputs[seq_id].samples[0].logprobs[former_key]
+                            #g
+                            # del output.outputs[seq_id].samples[0].logprobs[former_key]
+        else:
+            pass
+        #! >>>>>>>>>>> add remaining tokens to output <<<<<<<<<<<<
+        if self.return_hidden_states:
+            # we only need to pass hidden states of most recent token
+            assert model_input.sampling_metadata is not None
+            indices = model_input.sampling_metadata.selected_token_indices
+            if model_input.is_prompt:
+                hidden_states = hidden_or_intermediate_states.index_select(
+                    0, indices)
+            elif decode_meta.use_cuda_graph:
+                hidden_states = hidden_or_intermediate_states[:len(indices)]
+            else:
+                hidden_states = hidden_or_intermediate_states
+            output.hidden_states = hidden_states
+        return [output]
+```
+## Preparation 📖
+### Model Preparation 🛠️
+```bash
+cd ./Preparation
+```
+Modify the `ori_model_path` and `new_model_path` variables in `Preparation/add_special_tokens.py` to embed special tokens into the new model.
+```python
+    ori_model_path = '/path/to/your/ori/model'
+    new_model_path = '/path/to/your/new/model'
+```
+### Data Preparation 📥
+Our training data can be downloaded from the following links:
+[Dataset-BudgetThinker](https://huggingface.co/datasets/Xin-Rui/Dataset-BudgetThinker/tree/main )
+After downloading the SFT-Data, register it in the `dataset_info.json` file of LLaMA-Factory with the registration name `8ratio_SFT_below10000`.
+#### Data Format
+**NOTICE!** ⚠️
+The data format must remain the same during the SFT and RL stages.
+The format of data must strictly follow the following example (especially the prompt format in 'prompt', it's must be the same as ):
+```json
+"prompt":"Return your final response within \\boxed{}.
+xxxxxx
+\n(Complete thinking within 1600 tokens or fewer, 7 special tokens ( \n<remaining>7/8</remaining>\n , \n<remaining>6/8</remaining>\n , \n<remaining>5/8</remaining>\n , \n<remaining>4/8</remaining>\n , \n<remaining>3/8</remaining>\n , \n<remaining>2/8</remaining>\n , \n<remaining>1/8</remaining>\n ) will split the thinking process into 8 parts.)"
+"answer":"<think>
+xxxxx
+</think>\n**Final Answer**\\boxed{}"
+```
+The data format is the same as the one used in the paper. For more details, please refer to the paper.
+## Training 🏋️‍♂️
+### SFT Training
+```bash
+cd ./LLaMA-Factory
+```
+Use deepseed to accelerate the training process.
+For detailed scripts, refer to `LLaMA-Factory/examples/deepseed_train.sh`.
+### RL Training
+```bash
+cd ./easyr1
+```
+After configuring the `model_path` parameter in the `easyr1/examples/8ratio_v1.sh` and `easyr1/examples/8ratio_v1.yaml` files, you can run the following command:
+```bash
+bash /mnt/lyc/wuxinrui/BudgetThinker/easyr1/examples/8ratio_v1.sh
+```
+#### Parameter Introduction
+The script involves three environment variables: stage, steady, and remaining.
+- stage: 1/2, representing the use of 1/2 stage inference during training.
+    Stage 1 represents normal output of the chain of thought.
+    Stage 2 represents manually interrupting the output when the chain of thought reaches the budget, and manually inserting `</think>\n**Final Answer**` as the ending prompt at the current position, followed by another output.
+- steady: Represents the name of the current training session. For example, with "8ratio_v1", it is best to modify all occurrences of this string in both the .sh and .yaml files. This will affect the output location of checkpoints, the output location of logs, and the budget settings under the current training configuration. For more details, refer to `easyr1/verl/utils/dataset.py`.
+- remaining: The vllm inference mode. Setting it to 8ratio uses the default method (splitting the chain of thought into 8 parts). If set to default, vllm will perform normal inference without adding any special tokens.
+## Evaluation 📊
+First, modify the `MODEL_NAME_OR_PATH` parameter in the `evaluation/remaining_eval/Eval.sh` script, and then run the following command:
+```bash
+cd ./evaluation
+bash evaluation/remaining_eval/Eval.sh
+```
+### Parameter Introduction
+The following parameters/environment variables need to be set in the script:
+- remaining/stage: Same as described above.
+- tip: The template for the prompt before the question. If using the 8ratio inference mode, the tip must also be set to 8ratio. Additionally, tip can be set to prompt_v1 or prompt_v2, which are two different natural language prompts.
+- MODEL_NAME_OR_PATH: The path to the model. It is recommended to use a recognizable model name as the second-to-last folder name in the path, as the code will read this name as the current evaluation model and store logs in the corresponding folder. For example: `/path1/path2/Model_Name/models`

easyr1/Dockerfile ADDED Viewed

	@@ -0,0 +1,68 @@

+# Start from the NVIDIA official image (ubuntu-22.04 + python-3.10)
+# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-08.html
+FROM nvcr.io/nvidia/pytorch:24.08-py3
+# Define environments
+ENV MAX_JOBS=32
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
+ENV DEBIAN_FRONTEND=noninteractive
+ENV NODE_OPTIONS=""
+ENV HF_HUB_ENABLE_HF_TRANSFER="1"
+# Define installation arguments
+ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/
+ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+ARG VLLM_COMMIT=227578480d71fc94ef46ca77fb69496412158d68
+# Set apt source
+RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
+    { \
+    echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \
+    echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \
+    echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \
+    echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \
+    } > /etc/apt/sources.list
+# Install systemctl
+RUN apt-get update && \
+    apt-get install -y -o Dpkg::Options::="--force-confdef" systemd && \
+    apt-get clean
+# Install tini
+RUN apt-get update && \
+    apt-get install -y tini && \
+    apt-get clean
+# Change pip source
+RUN pip config set global.index-url "${PIP_INDEX}" && \
+    pip config set global.extra-index-url "${PIP_INDEX}" && \
+    python -m pip install --upgrade pip
+# Uninstall nv-pytorch fork
+RUN pip uninstall -y torch torchvision torchaudio \
+    pytorch-quantization pytorch-triton torch-tensorrt \
+    xgboost transformer_engine flash_attn apex megatron-core
+# Install vllm-0.7.4-nightly
+RUN pip install --no-cache-dir vllm --pre --extra-index-url "https://wheels.vllm.ai/${VLLM_COMMIT}" && \
+    git clone -b verl_v1 https://github.com/hiyouga/vllm.git && \
+    cp -r vllm/vllm/ /usr/local/lib/python3.10/dist-packages/
+# Install torch-2.5.1
+RUN pip install --no-cache-dir torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 tensordict torchdata \
+    transformers>=4.49.0 accelerate datasets peft hf-transfer \
+    ray[default] codetiming hydra-core pandas pyarrow>=15.0.0 pylatexenc qwen-vl-utils wandb liger-kernel mathruler \
+    pytest yapf py-spy pyext pre-commit ruff
+# Install flash_attn-2.7.4.post1
+RUN wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
+    pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
+# Fix cv2
+RUN pip uninstall -y pynvml nvidia-ml-py && \
+    pip install --no-cache-dir nvidia-ml-py>=12.560.30 opencv-python-headless==4.8.0.74 fastapi==0.115.6 && \
+    pip install --no-cache-dir --upgrade optree>=0.13.0
+# Reset pip config
+RUN pip config unset global.index-url && \
+    pip config unset global.extra-index-url

easyr1/Dockerfile.nightly ADDED Viewed

	@@ -0,0 +1,62 @@

+# Start from the NVIDIA official image (ubuntu-22.04 + python-3.10)
+# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-08.html
+FROM nvcr.io/nvidia/pytorch:24.08-py3
+# Define environments
+ENV MAX_JOBS=32
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
+ENV DEBIAN_FRONTEND=noninteractive
+ENV NODE_OPTIONS=""
+ENV HF_HUB_ENABLE_HF_TRANSFER="1"
+# Define installation arguments
+ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/
+ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+# Set apt source
+RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
+    { \
+    echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \
+    echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \
+    echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \
+    echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \
+    } > /etc/apt/sources.list
+# Install systemctl
+RUN apt-get update && \
+    apt-get install -y -o Dpkg::Options::="--force-confdef" systemd && \
+    apt-get clean
+# Install tini
+RUN apt-get update && \
+    apt-get install -y tini && \
+    apt-get clean
+# Change pip source
+RUN pip config set global.index-url "${PIP_INDEX}" && \
+    pip config set global.extra-index-url "${PIP_INDEX}" && \
+    python -m pip install --upgrade pip
+# Uninstall nv-pytorch fork
+RUN pip uninstall -y torch torchvision torchaudio \
+    pytorch-quantization pytorch-triton torch-tensorrt \
+    xgboost transformer_engine flash_attn apex megatron-core
+# Install torch-2.6.0 + vllm-0.8.2
+RUN pip install --no-cache-dir vllm==0.8.2 torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 tensordict torchdata \
+    transformers>=4.49.0 accelerate datasets peft hf-transfer \
+    ray[default] codetiming hydra-core pandas pyarrow>=15.0.0 pylatexenc qwen-vl-utils wandb liger-kernel mathruler \
+    pytest yapf py-spy pyext pre-commit ruff
+# Install flash_attn-2.7.4.post1
+RUN wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
+    pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
+# Fix cv2
+RUN pip uninstall -y pynvml nvidia-ml-py && \
+    pip install --no-cache-dir nvidia-ml-py>=12.560.30 opencv-python-headless==4.8.0.74 fastapi==0.115.6 && \
+    pip install --no-cache-dir --upgrade optree>=0.13.0
+# Reset pip config
+RUN pip config unset global.index-url && \
+    pip config unset global.extra-index-url

easyr1/cut_dataset.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import pandas as pd
+def cut_data():
+    file_path = "datasets/train_first_half.parquet"
+    data = pd.read_parquet(file_path)
+    print(data['problem'][0])
+    half_size = len(data) // 2
+    data_first_half = data.iloc[:half_size]
+    data_second_half = data.iloc[half_size:]
+    print(f"First half length: {len(data_first_half)}")
+    print(f"Second half length: {len(data_second_half)}")
+    data_first_half.to_parquet("datasets/train_1_in_4.parquet", index=False)
+    data_second_half.to_parquet("datasets/train_2_in_4.parquet", index=False)
+def formatted_data():
+    file_path = "datasets/train_first_half.parquet"
+    data = pd.read_parquet(file_path)
+    data['problem'] = data['problem'].apply(lambda x: "Return your final response within \\boxed{}. " + x)
+    print(data['problem'][0])
+    target_path = file_path.replace(".parquet", "_formatted.parquet")
+    data.to_parquet(target_path, index=False)
+def visualize_data():
+        # 定义文件路径
+    file_path = "datasets/train-00000-of-00001_formatted.parquet"
+    # 读取数据
+    data = pd.read_parquet(file_path)
+    print(data.head())
+if __name__ == "__main__":
+    formatted_data()
+    visualize_data()
+    cut_data()

easyr1/datasets/math500_RL.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1686bb35a32b22c862b4c81c4fe8b6923049f2e7c5cb71f5c0c9a1c584258f4b
+size 64102

easyr1/datasets/train_RL.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:75d9986eea213b116bbea1668942b7849772e1f8f1a9fea249ec7a1c6c65ed10
+size 1787510

easyr1/delete_checkpoints.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import os
+import shutil
+from watchdog.observers import Observer
+from watchdog.events import FileSystemEventHandler
+import time
+import re
+class CheckpointHandler(FileSystemEventHandler):
+    def __init__(self, folder_path, max_checkpoints=2):
+        self.folder_path = folder_path
+        self.max_checkpoints = max_checkpoints
+    def on_created(self, event):
+        if not event.is_directory:
+            return
+        # No need to call cleanup_checkpoints here if we're already calling it every 30 minutes
+    def cleanup_checkpoints(self):
+        # List all subdirectories in the folder
+        checkpoints = [os.path.join(self.folder_path, d) for d in os.listdir(self.folder_path) if os.path.isdir(os.path.join(self.folder_path, d))]
+        # Filter checkpoints that match the pattern "checkpoint-<number>"
+        checkpoints = [checkpoint for checkpoint in checkpoints if re.match(r'global_step_\d+', os.path.basename(checkpoint))]
+        # Get creation time and sort by creation time
+        checkpoints_with_time = [(os.path.getctime(checkpoint), checkpoint) for checkpoint in checkpoints]
+        checkpoints_with_time.sort()  # Sort by creation time
+        specific_checkpoints = {f"global_step_{i}" for i in [45, 90, 135, 180, 220]}  # Add more as needed
+        # Remove all but the last max_checkpoints directories
+        if len(checkpoints_with_time) <= self.max_checkpoints:
+            print(f"No need to remove any checkpoints, {len(checkpoints_with_time)} checkpoints exist")
+        else:
+            for _, checkpoint in checkpoints_with_time[:-self.max_checkpoints]:
+                checkpoint_name = os.path.basename(checkpoint)
+                if checkpoint_name not in specific_checkpoints:
+                    shutil.rmtree(checkpoint)
+                    print(f"Removed old checkpoint: {checkpoint}")
+                else:
+                    print(f"Skipped specific checkpoint: {checkpoint}")
+def main():
+    folder_path = '/data/wuxinrui/easyr1_checkpoints/1_5B_TCMv2_long_short_regular_budget_modified'  # Change this to your path
+    event_handler = CheckpointHandler(folder_path)
+    observer = Observer()
+    observer.schedule(event_handler, folder_path, recursive=False)
+    observer.start()
+    try:
+        while True:
+            event_handler.cleanup_checkpoints()  # Call cleanup_checkpoints every 30 minutes
+            time.sleep(300)  # Wait for 5 minutes
+    except KeyboardInterrupt:
+        observer.stop()
+    observer.join()
+if __name__ == "__main__":
+    main()

easyr1/examples/8ratio_v1.sh ADDED Viewed

	@@ -0,0 +1,15 @@

+set -x
+export stage=2
+export VLLM_ATTENTION_BACKEND=XFORMERS
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+export steady=8ratio_v1
+export TENSORBOARD_DIR=tensorlog_${steady}
+MODEL_PATH=/path/to/your/model
+export remaining=8ratio
+python3 -m verl.trainer.main \
+    config=examples/8ratio_v1.yaml \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    trainer.n_gpus_per_node=4

easyr1/examples/8ratio_v1.yaml ADDED Viewed

	@@ -0,0 +1,88 @@

+data:
+  train_files: ./datasets/train_RL.parquet
+  val_files: ./datasets/math500_RL.parquet
+  prompt_key: problem
+  answer_key: answer
+  image_key: images
+  max_prompt_length: 1024
+  max_response_length: 10000
+  rollout_batch_size: 256
+  val_batch_size: -1
+  shuffle: true
+  seed: 1
+  max_pixels: 4194304
+  min_pixels: 262144
+algorithm:
+  adv_estimator: grpo
+  disable_kl: false
+  use_kl_loss: true
+  kl_penalty: low_var_kl
+  kl_coef: 1.0e-2
+worker:
+  actor:
+    global_batch_size: 128
+    micro_batch_size_per_device_for_update: 4
+    micro_batch_size_per_device_for_experience: 16
+    max_grad_norm: 1.0
+    padding_free: true
+    ulysses_sequence_parallel_size: 1
+    model:
+      model_path: /path/to/your/model
+      enable_gradient_checkpointing: true
+      trust_remote_code: false
+      freeze_vision_tower: false
+    optim:
+      lr: 1.0e-6
+      weight_decay: 1.0e-2
+      strategy: adamw  # {adamw, adamw_bf16}
+      lr_warmup_ratio: 0.0
+    fsdp:
+      enable_full_shard: true
+      enable_cpu_offload: false
+      enable_rank0_init: true
+    offload:
+      offload_params: true  # true: more CPU memory; false: more GPU memory
+      offload_optimizer: true  # true: more CPU memory; false: more GPU memory
+  rollout:
+    temperature: 1.0
+    n: 5
+    gpu_memory_utilization: 0.8
+    enforce_eager: false
+    enable_chunked_prefill: false
+    tensor_parallel_size: 2
+    limit_images: 0
+    val_override_config:
+      temperature: 0.0
+      n: 1
+  ref:
+    fsdp:
+      enable_full_shard: true
+      enable_cpu_offload: true  # true: more CPU memory; false: more GPU memory
+      enable_rank0_init: true
+    offload:
+      offload_params: true
+  reward:
+    reward_type: function
+    # score_function: math
+    score_function: reason_with_in_limit
+trainer:
+  total_episodes: 8
+  logger: ["console", "tensorboard"]
+  project_name: 8ratio_v1
+  experiment_name: 8ratio_v1
+  n_gpus_per_node: 4
+  nnodes: 1
+  val_freq: -1  # -1 to disable
+  val_before_train: false
+  val_only: false
+  val_generations_to_log: 1
+  save_freq: 1  # -1 to disable
+  save_limit: 2  # -1 to disable
+  save_checkpoint_path: training/8ratio_v1
+  load_checkpoint_path: null

easyr1/examples/baselines/qwen2_5_vl_3b_clevr.sh ADDED Viewed

	@@ -0,0 +1,19 @@

+#!/bin/bash
+set -x
+export PYTHONUNBUFFERED=1
+MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct  # replace it with your local file path
+python3 -m verl.trainer.main \
+    config=examples/config.yaml \
+    data.train_files=BUAADreamer/clevr_count_70k@train \
+    data.val_files=BUAADreamer/clevr_count_70k@test \
+    data.format_prompt=./examples/format_prompt/r1v_format.jinja \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.rollout.tensor_parallel_size=1 \
+    worker.reward.reward_type=sequential \
+    worker.reward.reward_function=./examples/reward_function/r1v.py:compute_score \
+    trainer.experiment_name=qwen2_5_vl_3b_clevr \
+    trainer.n_gpus_per_node=2

easyr1/examples/baselines/qwen2_5_vl_3b_geoqa8k.sh ADDED Viewed

	@@ -0,0 +1,19 @@

+#!/bin/bash
+set -x
+export PYTHONUNBUFFERED=1
+MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct  # replace it with your local file path
+python3 -m verl.trainer.main \
+    config=examples/config.yaml \
+    data.train_files=leonardPKU/GEOQA_8K_R1V@train \
+    data.val_files=leonardPKU/GEOQA_8K_R1V@test \
+    data.format_prompt=./examples/format_prompt/r1v_format.jinja \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.rollout.tensor_parallel_size=1 \
+    worker.reward.reward_type=sequential \
+    worker.reward.reward_function=./examples/reward_function/r1v.py:compute_score \
+    trainer.experiment_name=qwen2_5_vl_3b_geoqa8k \
+    trainer.n_gpus_per_node=8

easyr1/examples/format_prompt/math_format.jinja ADDED Viewed

	@@ -0,0 +1 @@


1	+ {{ content \| trim }} You FIRST think about the reasoning process as an internal monologue and then provide the final answer. The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}.

easyr1/examples/format_prompt/r1v_format.jinja ADDED Viewed

	@@ -0,0 +1 @@

+ {{ content | trim }} A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>

easyr1/examples/reward_function/math.py ADDED Viewed

	@@ -0,0 +1,46 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+from typing import Dict, List
+from mathruler.grader import extract_boxed_content, grade_answer
+def format_reward(predict: str) -> float:
+    pattern = re.compile(r"<think>.*</think>.*\\boxed\{.*\}.*", re.DOTALL)
+    format_match = re.fullmatch(pattern, predict)
+    return 1.0 if format_match else 0.0
+def accuracy_reward(predict: str, ground_truth: str) -> float:
+    answer = extract_boxed_content(predict)
+    return 1.0 if grade_answer(answer, ground_truth) else 0.0
+def compute_score(predicts: List[str], ground_truths: List[str], format_weight: float = 0.1) -> List[Dict[str, float]]:
+    scores = []
+    for predict, ground_truth in zip(predicts, ground_truths):
+        predict = re.sub(r"\s*(<|>|/)\s*", r"\1", predict)  # handle qwen2.5vl-32b format
+        format_score = format_reward(predict)
+        accuracy_score = accuracy_reward(predict, ground_truth)
+        scores.append(
+            {
+                "overall": (1 - format_weight) * accuracy_score + format_weight * format_score,
+                "format": format_score,
+                "accuracy": accuracy_score,
+            }
+        )
+    return scores

easyr1/examples/reward_function/r1v.py ADDED Viewed

	@@ -0,0 +1,47 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+from typing import Dict
+from mathruler.grader import grade_answer
+def format_reward(predict: str) -> float:
+    pattern = re.compile(r"<think>.*?</think>\s*<answer>.*?</answer>", re.DOTALL)
+    format_match = re.fullmatch(pattern, predict)
+    return 1.0 if format_match else 0.0
+def accuracy_reward(predict: str, ground_truth: str) -> float:
+    try:
+        content_match = re.search(r"<answer>(.*?)</answer>", predict)
+        given_answer = content_match.group(1).strip() if content_match else predict.strip()
+        if grade_answer(given_answer, ground_truth.strip()):
+            return 1.0
+    except Exception:
+        pass
+    return 0.0
+def compute_score(predict: str, ground_truth: str, format_weight: float = 0.5) -> Dict[str, float]:
+    format_score = format_reward(predict)
+    accuracy_score = accuracy_reward(predict, ground_truth)
+    return {
+        "overall": (1 - format_weight) * accuracy_score + format_weight * format_score,
+        "format": format_score,
+        "accuracy": accuracy_score,
+    }

easyr1/pyproject.toml ADDED Viewed

	@@ -0,0 +1,39 @@

+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "verl"
+dynamic = [
+    "version",
+    "dependencies",
+    "optional-dependencies",
+    "requires-python",
+    "authors",
+    "description",
+    "readme",
+    "license"
+]
+[tool.ruff]
+target-version = "py39"
+line-length = 119
+indent-width = 4
+[tool.ruff.lint]
+ignore = ["C901", "E501", "E741", "W605", "C408"]
+select = ["C", "E", "F", "I", "W", "RUF022"]
+[tool.ruff.lint.per-file-ignores]
+"__init__.py" = ["E402", "F401", "F403", "F811"]
+[tool.ruff.lint.isort]
+lines-after-imports = 2
+known-first-party = ["verl"]
+known-third-party = ["torch", "transformers", "wandb"]
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+skip-magic-trailing-comma = false
+line-ending = "auto"

easyr1/requirements.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+accelerate
+codetiming
+datasets
+flash-attn>=2.4.3
+liger-kernel
+mathruler
+numpy
+omegaconf
+pandas
+peft
+pillow
+pyarrow>=15.0.0
+pylatexenc
+qwen-vl-utils
+ray[default]
+tensordict
+torchdata
+transformers>=4.49.0
+vllm>=0.7.3
+wandb

easyr1/scripts/model_merger.py ADDED Viewed

	@@ -0,0 +1,164 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import os
+import re
+from concurrent.futures import ThreadPoolExecutor
+from typing import Dict, List, Tuple
+import torch
+from torch.distributed._tensor import DTensor, Placement, Shard
+from transformers import AutoConfig, AutoModelForCausalLM, AutoModelForTokenClassification, AutoModelForVision2Seq
+def merge_by_placement(tensors: List[torch.Tensor], placement: Placement):
+    if placement.is_replicate():
+        return tensors[0]
+    elif placement.is_partial():
+        raise NotImplementedError("Partial placement is not supported yet")
+    elif placement.is_shard():
+        return torch.cat(tensors, dim=placement.dim).contiguous()
+    else:
+        raise ValueError(f"Unsupported placement: {placement}")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--local_dir", required=True, type=str, help="The path for your saved model")
+    parser.add_argument("--hf_upload_path", default=False, type=str, help="The path of the huggingface repo to upload")
+    args = parser.parse_args()
+    assert not args.local_dir.endswith("huggingface"), "The local_dir should not end with huggingface"
+    local_dir = args.local_dir
+    # copy rank zero to find the shape of (dp, fsdp)
+    rank = 0
+    world_size = 0
+    for filename in os.listdir(local_dir):
+        match = re.match(r"model_world_size_(\d+)_rank_0\.pt", filename)
+        if match:
+            world_size = match.group(1)
+            break
+    assert world_size, "No model file with the proper format"
+    state_dict = torch.load(
+        os.path.join(local_dir, f"model_world_size_{world_size}_rank_{rank}.pt"), map_location="cpu"
+    )
+    pivot_key = sorted(state_dict.keys())[0]
+    weight = state_dict[pivot_key]
+    assert isinstance(weight, torch.distributed._tensor.DTensor)
+    # get sharding info
+    device_mesh = weight.device_mesh
+    mesh = device_mesh.mesh
+    mesh_dim_names = device_mesh.mesh_dim_names
+    print(f"Got device mesh {mesh}, mesh_dim_names {mesh_dim_names}")
+    assert mesh_dim_names in (("fsdp",),), f"Unsupported mesh_dim_names {mesh_dim_names}"
+    if "tp" in mesh_dim_names:
+        # fsdp * tp
+        total_shards = mesh.shape[-1] * mesh.shape[-2]
+        mesh_shape = (mesh.shape[-2], mesh.shape[-1])
+    else:
+        # fsdp
+        total_shards = mesh.shape[-1]
+        mesh_shape = (mesh.shape[-1],)
+    print(f"Processing model shards with {total_shards} {mesh_shape} in total")
+    model_state_dict_lst = []
+    model_state_dict_lst.append(state_dict)
+    model_state_dict_lst.extend([""] * (total_shards - 1))
+    def process_one_shard(rank):
+        model_path = os.path.join(local_dir, f"model_world_size_{world_size}_rank_{rank}.pt")
+        state_dict = torch.load(model_path, map_location="cpu", weights_only=False)
+        model_state_dict_lst[rank] = state_dict
+        return state_dict
+    with ThreadPoolExecutor(max_workers=min(32, os.cpu_count())) as executor:
+        for rank in range(1, total_shards):
+            executor.submit(process_one_shard, rank)
+    state_dict = {}
+    param_placements: Dict[str, List[Placement]] = {}
+    keys = set(model_state_dict_lst[0].keys())
+    for key in keys:
+        state_dict[key] = []
+        for model_state_dict in model_state_dict_lst:
+            try:
+                tensor = model_state_dict.pop(key)
+            except Exception:
+                print("-" * 30)
+                print(model_state_dict)
+            if isinstance(tensor, DTensor):
+                state_dict[key].append(tensor._local_tensor.bfloat16())
+                placements = tuple(tensor.placements)
+                # replicated placement at dp dimension can be discarded
+                if mesh_dim_names[0] == "dp":
+                    placements = placements[1:]
+                if key not in param_placements:
+                    param_placements[key] = placements
+                else:
+                    assert param_placements[key] == placements
+            else:
+                state_dict[key] = tensor.bfloat16()
+    del model_state_dict_lst
+    for key in sorted(state_dict):
+        if not isinstance(state_dict[key], list):
+            print(f"No need to merge key {key}")
+            continue
+        # merge shards
+        placements: Tuple[Shard] = param_placements[key]
+        if len(mesh_shape) == 1:
+            # 1-D list, FSDP without TP
+            assert len(placements) == 1
+            shards = state_dict[key]
+            state_dict[key] = merge_by_placement(shards, placements[0])
+        else:
+            # 2-D list, FSDP + TP
+            raise NotImplementedError("FSDP + TP is not supported yet")
+    print("Writing to local disk")
+    hf_path = os.path.join(local_dir, "huggingface")
+    config = AutoConfig.from_pretrained(hf_path)
+    if "ForTokenClassification" in config.architectures[0]:
+        auto_model = AutoModelForTokenClassification
+    elif "ForCausalLM" in config.architectures[0]:
+        auto_model = AutoModelForCausalLM
+    elif "ForConditionalGeneration" in config.architectures[0]:
+        auto_model = AutoModelForVision2Seq
+    else:
+        raise NotImplementedError(f"Unknown architecture {config.architectures}")
+    with torch.device("meta"):
+        model = auto_model.from_config(config, torch_dtype=torch.bfloat16)
+    model.to_empty(device="cpu")
+    print(f"Saving model to {hf_path}")
+    model.save_pretrained(hf_path, state_dict=state_dict)
+    del state_dict
+    del model
+    if args.hf_upload_path:
+        # Push to hugging face
+        from huggingface_hub import HfApi
+        api = HfApi()
+        api.create_repo(repo_id=args.hf_upload_path, private=False, exist_ok=True)
+        api.upload_folder(folder_path=hf_path, repo_id=args.hf_upload_path, repo_type="model")

easyr1/setup.py ADDED Viewed

	@@ -0,0 +1,61 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import re
+from setuptools import find_packages, setup
+def get_version() -> str:
+    with open(os.path.join("verl", "__init__.py"), encoding="utf-8") as f:
+        file_content = f.read()
+        pattern = r"__version__\W*=\W*\"([^\"]+)\""
+        (version,) = re.findall(pattern, file_content)
+        return version
+def get_requires() -> list[str]:
+    with open("requirements.txt", encoding="utf-8") as f:
+        file_content = f.read()
+        lines = [line.strip() for line in file_content.strip().split("\n") if not line.startswith("#")]
+        return lines
+extra_require = {
+    "dev": ["pre-commit", "ruff"],
+}
+def main():
+    setup(
+        name="verl",
+        version=get_version(),
+        description="An Efficient, Scalable, Multi-Modality RL Training Framework based on veRL",
+        long_description=open("README.md", encoding="utf-8").read(),
+        long_description_content_type="text/markdown",
+        author="verl",
+        author_email="zhangchi.usc1992@bytedance.com, gmsheng@connect.hku.hk, hiyouga@buaa.edu.cn",
+        license="Apache 2.0 License",
+        url="https://github.com/volcengine/verl",
+        package_dir={"": "."},
+        packages=find_packages(where="."),
+        python_requires=">=3.9.0",
+        install_requires=get_requires(),
+        extras_require=extra_require,
+    )
+if __name__ == "__main__":
+    main()

easyr1/verl/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+__version__ = "0.2.0.dev"

easyr1/verl/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (181 Bytes). View file

easyr1/verl/__pycache__/protocol.cpython-311.pyc ADDED Viewed

Binary file (39 kB). View file

easyr1/verl/models/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

easyr1/verl/models/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (162 Bytes). View file

easyr1/verl/models/__pycache__/monkey_patch.cpython-311.pyc ADDED Viewed

Binary file (1.28 kB). View file

easyr1/verl/models/monkey_patch.py ADDED Viewed

	@@ -0,0 +1,32 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS
+from .transformers.flash_attention_utils import flash_attention_forward
+from .transformers.qwen2_vl import qwen2_vl_attn_forward
+def apply_ulysses_patch(model_type: str) -> None:
+    if model_type in ("llama", "gemma", "gemma2", "mistral", "qwen2"):
+        ALL_ATTENTION_FUNCTIONS["flash_attention_2"] = flash_attention_forward
+    elif model_type in ("qwen2_vl", "qwen2_5_vl"):
+        from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import Qwen2_5_VLFlashAttention2
+        from transformers.models.qwen2_vl.modeling_qwen2_vl import Qwen2VLFlashAttention2
+        Qwen2VLFlashAttention2.forward = qwen2_vl_attn_forward
+        Qwen2_5_VLFlashAttention2.forward = qwen2_vl_attn_forward
+    else:
+        raise NotImplementedError(f"Model architecture {model_type} is not supported yet.")

easyr1/verl/models/transformers/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

easyr1/verl/models/transformers/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (184 Bytes). View file

easyr1/verl/models/transformers/__pycache__/flash_attention_utils.cpython-311.pyc ADDED Viewed

Binary file (8.04 kB). View file

easyr1/verl/models/transformers/__pycache__/qwen2_vl.cpython-311.pyc ADDED Viewed

Binary file (9.79 kB). View file

easyr1/verl/models/transformers/flash_attention_utils.py ADDED Viewed

	@@ -0,0 +1,191 @@

+# Copyright 2024 The Fairseq Authors and the HuggingFace Inc. team
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+# Based on https://github.com/huggingface/transformers/blob/v4.49.0/src/transformers/modeling_flash_attention_utils.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import inspect
+import os
+from typing import Optional, Tuple
+import torch
+import torch.distributed as dist
+from transformers.modeling_flash_attention_utils import _flash_attention_forward, fa_peft_integration_check
+from transformers.utils import is_flash_attn_2_available, is_flash_attn_greater_or_equal_2_10
+from ...utils.ulysses import (
+    gather_heads_scatter_seq,
+    gather_seq_scatter_heads,
+    get_ulysses_sequence_parallel_group,
+    get_ulysses_sequence_parallel_world_size,
+)
+if is_flash_attn_2_available():
+    from flash_attn import flash_attn_func, flash_attn_varlen_func
+    _flash_supports_window_size = "window_size" in inspect.signature(flash_attn_func).parameters
+    _flash_supports_deterministic = "deterministic" in inspect.signature(flash_attn_func).parameters
+    _flash_deterministic_enabled = os.environ.get("FLASH_ATTENTION_DETERMINISTIC", "0") == "1"
+    _flash_use_top_left_mask = not is_flash_attn_greater_or_equal_2_10()
+def prepare_fa2_from_position_ids(
+    query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, position_ids: torch.Tensor
+):
+    query = query.view(-1, query.size(-2), query.size(-1))
+    key = key.contiguous().view(-1, key.size(-2), key.size(-1))
+    value = value.contiguous().view(-1, value.size(-2), value.size(-1))
+    position_ids = position_ids.flatten()
+    indices_q = torch.arange(position_ids.size(0), device=position_ids.device, dtype=torch.int32)
+    cu_seqlens = torch.cat(
+        (
+            indices_q[position_ids == 0],
+            torch.tensor(position_ids.size(), device=position_ids.device, dtype=torch.int32),
+        )
+    )
+    max_length = cu_seqlens.diff().max()  # use cu_seqlens to infer max_length for qwen2vl mrope
+    return (query, key, value, indices_q, (cu_seqlens, cu_seqlens), (max_length, max_length))
+def _custom_flash_attention_forward(
+    query_states: torch.Tensor,
+    key_states: torch.Tensor,
+    value_states: torch.Tensor,
+    attention_mask: Optional[torch.Tensor],
+    query_length: int,
+    is_causal: bool = True,
+    position_ids: Optional[torch.Tensor] = None,
+    sliding_window: Optional[int] = None,
+    use_top_left_mask: bool = False,
+    deterministic: Optional[bool] = None,
+    **kwargs,
+):
+    """
+    Patches flash attention forward to handle 3D position ids in mrope. (3, batch_size, seq_length)
+    """
+    if not use_top_left_mask:
+        causal = is_causal
+    else:
+        causal = is_causal and query_length != 1
+    # Assuming 4D tensors, key_states.shape[1] is the key/value sequence length (source length).
+    use_sliding_windows = (
+        _flash_supports_window_size and sliding_window is not None and key_states.shape[1] > sliding_window
+    )
+    flash_kwargs = {"window_size": (sliding_window, sliding_window)} if use_sliding_windows else {}
+    if _flash_supports_deterministic:
+        flash_kwargs["deterministic"] = deterministic if deterministic is not None else _flash_deterministic_enabled
+    if kwargs.get("softcap") is not None:
+        flash_kwargs["softcap"] = kwargs.pop("softcap")
+    query_states, key_states, value_states = fa_peft_integration_check(
+        query_states, key_states, value_states, target_dtype=torch.bfloat16
+    )
+    sp_size = get_ulysses_sequence_parallel_world_size()
+    if sp_size > 1:
+        # (batch_size, seq_length, num_head, head_size)
+        query_states = gather_seq_scatter_heads(query_states, seq_dim=1, head_dim=2)
+        key_states = gather_seq_scatter_heads(key_states, seq_dim=1, head_dim=2)
+        value_states = gather_seq_scatter_heads(value_states, seq_dim=1, head_dim=2)
+        position_ids_lst = [torch.empty_like(position_ids) for _ in range(sp_size)]
+        position_ids = dist.all_gather(position_ids_lst, position_ids, group=get_ulysses_sequence_parallel_group())
+        position_ids = torch.cat(position_ids_lst, dim=-1)  # (..., batch_size, seq_length)
+    if position_ids is not None and position_ids.dim() == 3:  # qwen2vl mrope
+        position_ids = position_ids[0]
+    if position_ids is not None and query_length != 1 and not (torch.diff(position_ids, dim=-1) >= 0).all():
+        batch_size = query_states.size(0)
+        query_states, key_states, value_states, _, cu_seq_lens, max_seq_lens = prepare_fa2_from_position_ids(
+            query_states, key_states, value_states, position_ids
+        )
+        cu_seqlens_q, cu_seqlens_k = cu_seq_lens
+        max_seqlen_in_batch_q, max_seqlen_in_batch_k = max_seq_lens
+        attn_output = flash_attn_varlen_func(
+            query_states,
+            key_states,
+            value_states,
+            cu_seqlens_q=cu_seqlens_q,
+            cu_seqlens_k=cu_seqlens_k,
+            max_seqlen_q=max_seqlen_in_batch_q,
+            max_seqlen_k=max_seqlen_in_batch_k,
+            dropout_p=kwargs.pop("dropout", 0.0),
+            softmax_scale=kwargs.pop("softmax_scale", None),
+            causal=causal,
+            **flash_kwargs,
+        )
+        attn_output = attn_output.view(batch_size, -1, attn_output.size(-2), attn_output.size(-1))
+    else:
+        attn_output = _flash_attention_forward(
+            query_states,
+            key_states,
+            value_states,
+            attention_mask,
+            query_length,
+            is_causal=is_causal,
+            sliding_window=sliding_window,
+            use_top_left_mask=use_top_left_mask,
+            deterministic=deterministic,
+            **kwargs,
+        )  # do not pass position_ids to old flash_attention_forward
+    if sp_size > 1:
+        # (batch_size, seq_length, num_head, head_size)
+        attn_output = gather_heads_scatter_seq(attn_output, head_dim=2, seq_dim=1)
+    return attn_output
+def flash_attention_forward(
+    module: torch.nn.Module,
+    query: torch.Tensor,
+    key: torch.Tensor,
+    value: torch.Tensor,
+    attention_mask: Optional[torch.Tensor],
+    dropout: float = 0.0,
+    scaling: Optional[float] = None,
+    sliding_window: Optional[int] = None,
+    softcap: Optional[float] = None,
+    **kwargs,
+) -> Tuple[torch.Tensor, None]:
+    # This is before the transpose
+    q_len = query.shape[2]
+    # FA2 uses non-transposed inputs
+    query = query.transpose(1, 2)
+    key = key.transpose(1, 2)
+    value = value.transpose(1, 2)
+    # FA2 always relies on the value set in the module, so remove it if present in kwargs to avoid passing it twice
+    kwargs.pop("is_causal", None)
+    attn_output = _custom_flash_attention_forward(
+        query,
+        key,
+        value,
+        attention_mask,
+        query_length=q_len,
+        is_causal=True,
+        dropout=dropout,
+        softmax_scale=scaling,
+        sliding_window=sliding_window,
+        softcap=softcap,
+        use_top_left_mask=_flash_use_top_left_mask,
+        **kwargs,
+    )
+    return attn_output, None

easyr1/verl/models/transformers/qwen2_vl.py ADDED Viewed

	@@ -0,0 +1,189 @@

+# Copyright 2024 The Qwen team, Alibaba Group and the HuggingFace Inc. team
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+# Based on:
+# https://github.com/huggingface/transformers/blob/v4.49.0/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional, Tuple
+import torch
+from .flash_attention_utils import flash_attention_forward
+try:
+    from transformers.models.qwen2_vl.modeling_qwen2_vl import (
+        Qwen2VLAttention,
+        apply_multimodal_rotary_pos_emb,
+        repeat_kv,
+    )
+    from transformers.models.qwen2_vl.processing_qwen2_vl import Qwen2VLProcessor
+except ImportError:
+    pass
+def get_rope_index(
+    processor: "Qwen2VLProcessor",
+    input_ids: torch.Tensor,
+    image_grid_thw: Optional[torch.Tensor] = None,
+    video_grid_thw: Optional[torch.Tensor] = None,
+    second_per_grid_ts: Optional[torch.Tensor] = None,
+    attention_mask: Optional[torch.Tensor] = None,
+) -> torch.Tensor:
+    """
+    Gets the position ids for Qwen2-VL, it should be generated before sharding the sequence.
+    The batch dim has been removed and the input_ids should be a 1D tensor representing a single example.
+    https://github.com/huggingface/transformers/blob/v4.49.0/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py#L1546
+    """
+    spatial_merge_size = processor.image_processor.merge_size
+    tokens_per_second = 2
+    image_token_id = processor.tokenizer.convert_tokens_to_ids("<|image_pad|>")
+    video_token_id = processor.tokenizer.convert_tokens_to_ids("<|video_pad|>")
+    vision_start_token_id = processor.tokenizer.convert_tokens_to_ids("<|vision_start|>")
+    if input_ids is not None and (image_grid_thw is not None or video_grid_thw is not None):
+        if attention_mask is None:
+            attention_mask = torch.ones_like(input_ids)
+        position_ids = torch.ones(3, input_ids.size(0), dtype=input_ids.dtype, device=input_ids.device)  # (3, seqlen)
+        image_index, video_index = 0, 0
+        input_ids = input_ids[attention_mask == 1]
+        image_nums, video_nums = 0, 0
+        vision_start_indices = torch.argwhere(input_ids == vision_start_token_id)
+        vision_tokens = input_ids[vision_start_indices + 1]
+        image_nums = (vision_tokens == image_token_id).sum()
+        video_nums = (vision_tokens == video_token_id).sum()
+        input_tokens = input_ids.tolist()
+        llm_pos_ids_list: list = []
+        st = 0
+        remain_images, remain_videos = image_nums, video_nums
+        for _ in range(image_nums + video_nums):
+            if image_token_id in input_tokens and remain_images > 0:
+                ed_image = input_tokens.index(image_token_id, st)
+            else:
+                ed_image = len(input_tokens) + 1
+            if video_token_id in input_tokens and remain_videos > 0:
+                ed_video = input_tokens.index(video_token_id, st)
+            else:
+                ed_video = len(input_tokens) + 1
+            if ed_image < ed_video:
+                t, h, w = (
+                    image_grid_thw[image_index][0],
+                    image_grid_thw[image_index][1],
+                    image_grid_thw[image_index][2],
+                )
+                second_per_grid_t = 0
+                image_index += 1
+                remain_images -= 1
+                ed = ed_image
+            else:
+                t, h, w = (
+                    video_grid_thw[video_index][0],
+                    video_grid_thw[video_index][1],
+                    video_grid_thw[video_index][2],
+                )
+                if second_per_grid_ts is not None:
+                    second_per_grid_t = second_per_grid_ts[video_index]
+                else:
+                    second_per_grid_t = 1.0
+                video_index += 1
+                remain_videos -= 1
+                ed = ed_video
+            llm_grid_t, llm_grid_h, llm_grid_w = (
+                t.item(),
+                h.item() // spatial_merge_size,
+                w.item() // spatial_merge_size,
+            )
+            text_len = ed - st
+            st_idx = llm_pos_ids_list[-1].max() + 1 if len(llm_pos_ids_list) > 0 else 0
+            llm_pos_ids_list.append(torch.arange(text_len).view(1, -1).expand(3, -1) + st_idx)
+            t_index = torch.arange(llm_grid_t).view(-1, 1).expand(-1, llm_grid_h * llm_grid_w)
+            t_index = (t_index * second_per_grid_t * tokens_per_second).long().flatten()
+            h_index = torch.arange(llm_grid_h).view(1, -1, 1).expand(llm_grid_t, -1, llm_grid_w).flatten()
+            w_index = torch.arange(llm_grid_w).view(1, 1, -1).expand(llm_grid_t, llm_grid_h, -1).flatten()
+            llm_pos_ids_list.append(torch.stack([t_index, h_index, w_index]) + text_len + st_idx)
+            st = ed + llm_grid_t * llm_grid_h * llm_grid_w
+        if st < len(input_tokens):
+            st_idx = llm_pos_ids_list[-1].max() + 1 if len(llm_pos_ids_list) > 0 else 0
+            text_len = len(input_tokens) - st
+            llm_pos_ids_list.append(torch.arange(text_len).view(1, -1).expand(3, -1) + st_idx)
+        llm_positions = torch.cat(llm_pos_ids_list, dim=1).reshape(3, -1)
+        position_ids[..., attention_mask == 1] = llm_positions.to(position_ids.device)
+    else:
+        if attention_mask is not None:
+            position_ids = attention_mask.long().cumsum(-1) - 1
+            position_ids.masked_fill_(attention_mask == 0, 1)
+            position_ids = position_ids.unsqueeze(0).expand(3, -1).to(input_ids.device)
+        else:
+            position_ids = torch.arange(input_ids.shape[1], device=input_ids.device).view(1, -1).expand(3, -1)
+    return position_ids
+def qwen2_vl_attn_forward(
+    self: "Qwen2VLAttention",
+    hidden_states: torch.Tensor,
+    attention_mask: Optional[torch.Tensor] = None,
+    position_ids: Optional[torch.LongTensor] = None,
+    position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,  # will become mandatory in v4.46
+    **kwargs,
+) -> Tuple[torch.Tensor, None, None]:
+    bsz, q_len, _ = hidden_states.size()  # q_len = seq_length / sp_size
+    query_states = self.q_proj(hidden_states)  # (batch_size, seq_length / sp_size, num_heads * head_size)
+    key_states = self.k_proj(hidden_states)
+    value_states = self.v_proj(hidden_states)
+    query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+    key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+    value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+    # Because the input can be padded, the absolute sequence length depends on the max position id.
+    if position_embeddings is None:
+        cos, sin = self.rotary_emb(value_states, position_ids)
+    else:
+        cos, sin = position_embeddings
+    query_states, key_states = apply_multimodal_rotary_pos_emb(
+        query_states, key_states, cos, sin, self.rope_scaling["mrope_section"]
+    )
+    key_states = repeat_kv(key_states, self.num_key_value_groups)
+    value_states = repeat_kv(value_states, self.num_key_value_groups)
+    dropout_rate = 0.0 if not self.training else self.attention_dropout
+    sliding_window = None
+    if (
+        self.config.use_sliding_window
+        and getattr(self.config, "sliding_window", None) is not None
+        and self.layer_idx >= self.config.max_window_layers
+    ):
+        sliding_window = self.config.sliding_window
+    attn_output, _ = flash_attention_forward(
+        self,
+        query_states,
+        key_states,
+        value_states,
+        attention_mask,
+        dropout=dropout_rate,
+        sliding_window=sliding_window,
+        position_ids=position_ids,  # important: pass position ids
+    )  # (batch_size, seq_length, num_head / sp_size, head_size)
+    attn_output = attn_output.reshape(bsz, q_len, self.hidden_size).contiguous()
+    attn_output = self.o_proj(attn_output)
+    return attn_output, None, None

easyr1/verl/protocol.py ADDED Viewed

	@@ -0,0 +1,705 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Implement base data transfer protocol between any two functions, modules.
+We can subclass Protocol to define more detailed batch info with specific keys
+"""
+import copy
+import io
+import pickle
+from collections import defaultdict
+from dataclasses import dataclass, field
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+import numpy as np
+import ray
+import torch
+from numpy.typing import NDArray
+from tensordict import TensorDict
+from torch.distributed import ProcessGroup
+from torch.utils.data import DataLoader
+from .utils.py_functional import union_two_dict
+try:
+    import tensordict
+    tensordict.set_lazy_legacy(False).set()
+except Exception:
+    pass
+__all__ = ["DataProto", "union_tensor_dict"]
+def pad_dataproto_to_divisor(data: "DataProto", size_divisor: int) -> Tuple["DataProto", int]:
+    """Pad a DataProto to size divisible by size_divisor
+    Args:
+        data (DataProto): the unpadded DataProto
+        size_divisor (int): size divisor
+    Returns:
+        data (DataProto): the padded DataProto
+        pad_size (int)
+    """
+    assert isinstance(data, DataProto), "data must be a DataProto"
+    if len(data) % size_divisor != 0:
+        pad_size = size_divisor - len(data) % size_divisor
+        padding_protos = []
+        remaining_pad = pad_size
+        while remaining_pad > 0:
+            take_size = min(remaining_pad, len(data))
+            padding_protos.append(data[:take_size])
+            remaining_pad -= take_size
+        data_padded = DataProto.concat([data] + padding_protos)
+    else:
+        pad_size = 0
+        data_padded = data
+    return data_padded, pad_size
+def unpad_dataproto(data: "DataProto", pad_size: int) -> "DataProto":
+    if pad_size != 0:
+        data = data[:-pad_size]
+    return data
+def union_tensor_dict(tensor_dict1: TensorDict, tensor_dict2: TensorDict) -> TensorDict:
+    """Union two tensordicts."""
+    if tensor_dict1.batch_size != tensor_dict2.batch_size:
+        raise ValueError(
+            f"Two tensor dict must have identical batch size. Got {tensor_dict1.batch_size} and {tensor_dict2.batch_size}"
+        )
+    for key in tensor_dict2.keys():
+        if key in tensor_dict1 and not torch.equal(tensor_dict1[key], tensor_dict2[key]):
+            raise ValueError(f"Key already exists: {key}.")
+        tensor_dict1[key] = tensor_dict2[key]
+    return tensor_dict1
+def union_numpy_dict(tensor_dict1: Dict[str, NDArray], tensor_dict2: Dict[str, NDArray]) -> Dict[str, NDArray]:
+    for key in tensor_dict2.keys():
+        if key in tensor_dict1:
+            assert isinstance(tensor_dict2[key], np.ndarray)
+            assert isinstance(tensor_dict1[key], np.ndarray)
+            if not np.all(tensor_dict1[key] == tensor_dict2[key]):
+                raise ValueError(f"Key already exists: {key}.")
+        tensor_dict1[key] = tensor_dict2[key]
+    return tensor_dict1
+def batch_collate(features: List[Dict[str, Any]]) -> Dict[str, List[Any]]:
+    if len(features) == 0:
+        return {}
+    batch_features = defaultdict(list)
+    for feature in features:
+        for key, value in feature.items():
+            batch_features[key].append(value)
+    return batch_features
+def fold_batch_dim(data: "DataProto", new_batch_size: int):
+    """
+    Fold a batch dim from [bsz, xxx] into [new_bsz, bsz // new_bsz, xxx]
+    """
+    batch_size = data.batch.batch_size[0]
+    assert batch_size % new_batch_size == 0
+    tensor: TensorDict = data.batch
+    non_tensor = data.non_tensor_batch
+    tensor = tensor.view(new_batch_size, -1)
+    tensor.auto_batch_size_(batch_dims=1)
+    for key, val in non_tensor.items():
+        non_tensor[key] = np.reshape(val, newshape=(new_batch_size, -1, *val.shape[1:]))
+    return DataProto(batch=tensor, non_tensor_batch=non_tensor, meta_info=data.meta_info)
+def collate_fn(data_items: list["DataProtoItem"]):
+    batch = []
+    non_tensor_batch = []
+    for data in data_items:
+        batch.append(data.batch)
+        non_tensor_batch.append(data.non_tensor_batch)
+    batch = torch.stack(batch).contiguous()
+    non_tensor_batch = batch_collate(non_tensor_batch)
+    non_tensor_batch = {key: np.array(value, dtype=object) for key, value in non_tensor_batch.items()}
+    return DataProto(batch=batch, non_tensor_batch=non_tensor_batch)
+@dataclass
+class DataProtoItem:
+    batch: Optional[TensorDict] = None
+    non_tensor_batch: Dict[str, NDArray] = field(default_factory=dict)
+    meta_info: Dict[str, Any] = field(default_factory=dict)
+@dataclass
+class DataProto:
+    """
+    A DataProto is a data structure that aims to provide a standard protocol for data exchange between functions.
+    It contains a batch (TensorDict) and a meta_info (Dict). The batch is a TensorDict https://pytorch.org/tensordict/.
+    TensorDict allows you to manipulate a dictionary of Tensors like a single Tensor. Ideally, the tensors with the
+    same batch size should be put inside batch.
+    """
+    batch: Optional[TensorDict] = None
+    non_tensor_batch: Dict[str, NDArray] = field(default_factory=dict)
+    meta_info: Dict[str, Any] = field(default_factory=dict)
+    def __post_init__(self):
+        self.check_consistency()  # perform necessary checking
+    def __len__(self) -> int:
+        if self.batch is not None:
+            return self.batch.batch_size[0]
+        elif self.non_tensor_batch is not None and len(self.non_tensor_batch) > 0:
+            random_key = list(self.non_tensor_batch.keys())[0]
+            return self.non_tensor_batch[random_key].shape[0]
+        else:
+            return 0
+    def __getitem__(self, item: Union[int, slice]) -> Union["DataProto", "DataProtoItem"]:
+        tensor_data = self.batch[item]
+        non_tensor_data = {key: val[item] for key, val in self.non_tensor_batch.items()}
+        return_type = DataProto if isinstance(item, slice) else DataProtoItem
+        return return_type(batch=tensor_data, non_tensor_batch=non_tensor_data, meta_info=self.meta_info)
+    # def __getitem__(self, item: Union[int, slice, list, torch.Tensor]) -> "DataProto":
+    #     #g GPT建议
+    #     """
+    #     Returns a new DataProto subset regardless of index type (int, slice, list, tensor).
+    #     Always returns a DataProto, never a DataProtoItem to avoid errors in downstream.
+    #     """
+    #     if isinstance(item, int):
+    #         # convert to slice to ensure output is still DataProto
+    #         item = slice(item, item + 1)
+    #     elif isinstance(item, torch.Tensor):
+    #         if item.ndim == 0:  # scalar tensor
+    #             item = slice(int(item.item()), int(item.item()) + 1)
+    #     tensor_data = self.batch[item]
+    #     non_tensor_data = {key: val[item] for key, val in self.non_tensor_batch.items()}
+    #     return DataProto(batch=tensor_data, non_tensor_batch=non_tensor_data, meta_info=self.meta_info)
+    def __getstate__(self) -> Tuple[bytes, Dict[str, NDArray], Dict[str, Any]]:
+        buffer = io.BytesIO()
+        if self.batch is not None:
+            self.batch: TensorDict = self.batch.contiguous()
+            self.batch: TensorDict = self.batch.consolidate()
+        torch.save(self.batch, buffer)
+        buffer_bytes = buffer.getvalue()
+        return buffer_bytes, self.non_tensor_batch, self.meta_info
+    def __setstate__(self, data: Tuple[bytes, Dict[str, NDArray], Dict[str, Any]]) -> None:
+        batch_deserialized_bytes, non_tensor_batch, meta_info = data
+        batch_deserialized = io.BytesIO(batch_deserialized_bytes)
+        batch = torch.load(batch_deserialized, weights_only=False, map_location="cpu")
+        self.batch = batch
+        self.non_tensor_batch = non_tensor_batch
+        self.meta_info = meta_info
+    def save_to_disk(self, filepath: str) -> None:
+        with open(filepath, "wb") as f:
+            pickle.dump(self, f)
+    @staticmethod
+    def load_from_disk(filepath: str) -> "DataProto":
+        with open(filepath, "rb") as f:
+            data = pickle.load(f)
+            return data
+    def print_size(self, prefix: str = "") -> None:
+        size_of_tensordict = 0
+        for tensor in self.batch.values():
+            if isinstance(tensor, torch.Tensor):
+                size_of_tensordict += tensor.element_size() * tensor.numel()
+        size_of_numpy_array = 0
+        for value in self.non_tensor_batch.values():
+            size_of_numpy_array += value.nbytes
+        size_of_numpy_array /= 1024**3
+        size_of_tensordict /= 1024**3
+        message = f"Size of tensordict: {size_of_tensordict} GB, size of non_tensor_batch: {size_of_numpy_array} GB."
+        print({prefix}, {message})
+    def check_consistency(self):
+        """Check the consistency of the DataProto. Mainly for batch and non_tensor_batch
+        We expose this function as a public one so that user can call themselves directly
+        """
+        if self.batch is not None:
+            assert len(self.batch.batch_size) == 1, "only support num_batch_dims=1"
+        if self.batch is not None and len(self.non_tensor_batch) != 0:
+            # TODO: we can actually lift this restriction if needed
+            assert len(self.batch.batch_size) == 1, "only support num_batch_dims=1 when non_tensor_batch is not empty."
+            batch_size = self.batch.batch_size[0]
+            for key, val in self.non_tensor_batch.items():
+                assert len(val) == batch_size, f"key {key} length {len(val)} is not equal to batch size {batch_size}."
+    @classmethod
+    def from_single_dict(
+        cls,
+        data: Dict[str, Union[torch.Tensor, NDArray]],
+        meta_info: Optional[Dict[str, Any]] = None,
+    ) -> "DataProto":
+        tensors = {}
+        non_tensors = {}
+        for key, value in data.items():
+            if isinstance(value, torch.Tensor):
+                tensors[key] = value
+            elif isinstance(value, np.ndarray):
+                non_tensors[key] = value
+            else:
+                raise ValueError(f"Unsupported type in data {type(value)}")
+        return DataProto.from_dict(tensors=tensors, non_tensors=non_tensors, meta_info=meta_info)
+    @classmethod
+    def from_dict(
+        cls,
+        tensors: Dict[str, torch.Tensor],
+        non_tensors: Dict[str, NDArray] = None,
+        meta_info: Optional[Dict[str, Any]] = None,
+        num_batch_dims: int = 1,
+    ) -> "DataProto":
+        """Create a DataProto from a dict of tensors. This assumes that
+        1. All the tensor in tensors have the same dim0
+        2. Only dim0 is the batch dim
+        """
+        assert len(tensors) > 0, "tensors must not be empty"
+        assert num_batch_dims > 0, "num_batch_dims must be greater than zero"
+        if non_tensors is not None:
+            assert num_batch_dims == 1, "only support num_batch_dims=1 when non_tensors is not None."
+        meta_info = meta_info or {}
+        non_tensors = non_tensors or {}
+        assert isinstance(non_tensors, dict), "non_tensors should be a dictionary."
+        # get and check batch size
+        batch_size = None
+        pivot_key = None
+        for key, tensor in tensors.items():
+            if batch_size is None:
+                batch_size = tensor.shape[:num_batch_dims]
+                pivot_key = key
+            else:
+                current_batch = tensor.shape[:num_batch_dims]
+                assert batch_size == current_batch, (
+                    f"Not all the tensor in tensors have the same batch size with batch_dims={num_batch_dims}. "
+                    f"Got {pivot_key} has {batch_size}, {key} has {current_batch}"
+                )
+        tensor_dict = TensorDict(source=tensors, batch_size=batch_size)
+        return cls(batch=tensor_dict, non_tensor_batch=non_tensors, meta_info=meta_info)
+    def to(self, device: torch.device) -> "DataProto":
+        """move the batch to device
+        Args:
+            device (torch.device, str): torch device
+        Returns:
+            DataProto: the current DataProto
+        """
+        if self.batch is not None:
+            self.batch = self.batch.to(device)
+        return self
+    def select(
+        self,
+        batch_keys: Optional[List[str]] = None,
+        non_tensor_batch_keys: Optional[List[str]] = None,
+        meta_info_keys: Optional[List[str]] = None,
+        deepcopy: bool = False,
+    ) -> "DataProto":
+        """Select a subset of the DataProto via batch_keys and meta_info_keys
+        Args:
+            batch_keys (list, optional): a list of strings indicating the keys in batch to select
+            meta_info_keys (list, optional): a list of keys indicating the meta info to select
+        Returns:
+            DataProto: the DataProto with the selected batch_keys and meta_info_keys
+        """
+        # TODO (zhangchi.usc1992) whether to copy
+        if batch_keys is not None:
+            batch_keys = tuple(batch_keys)
+            sub_batch = self.batch.select(*batch_keys)
+        else:
+            sub_batch = self.batch
+        if non_tensor_batch_keys is not None:
+            non_tensor_batch = {k: v for k, v in self.non_tensor_batch.items() if k in non_tensor_batch_keys}
+        else:
+            non_tensor_batch = self.non_tensor_batch
+        if deepcopy:
+            non_tensor_batch = copy.deepcopy(non_tensor_batch)
+        if meta_info_keys is not None:
+            sub_meta_info = {k: v for k, v in self.meta_info.items() if k in meta_info_keys}
+        else:
+            sub_meta_info = self.meta_info
+        if deepcopy:
+            sub_meta_info = copy.deepcopy(sub_meta_info)
+        return DataProto(batch=sub_batch, non_tensor_batch=non_tensor_batch, meta_info=sub_meta_info)
+    def pop(
+        self,
+        batch_keys: Optional[List[str]] = None,
+        non_tensor_batch_keys: Optional[List[str]] = None,
+        meta_info_keys: Optional[List[str]] = None,
+    ) -> "DataProto":
+        """Pop a subset of the DataProto via `batch_keys` and `meta_info_keys`
+        Args:
+            batch_keys (list, optional): a list of strings indicating the keys in batch to pop
+            meta_info_keys (list, optional): a list of keys indicating the meta info to pop
+        Returns:
+            DataProto: the DataProto with the poped batch_keys and meta_info_keys
+        """
+        assert batch_keys is not None
+        non_tensor_batch_keys = non_tensor_batch_keys or []
+        meta_info_keys = meta_info_keys or []
+        tensors = {}
+        for key in batch_keys:
+            tensors[key] = self.batch.pop(key)
+        non_tensors = {}
+        for key in non_tensor_batch_keys:
+            non_tensors[key] = self.non_tensor_batch.pop(key)
+        meta_info = {}
+        for key in meta_info_keys:
+            meta_info[key] = self.meta_info.pop(key)
+        return DataProto.from_dict(tensors=tensors, non_tensors=non_tensors, meta_info=meta_info)
+    def rename(
+        self, old_keys: Optional[Union[str, List[str]]] = None, new_keys: Optional[Union[str, List[str]]] = None
+    ) -> "DataProto":
+        """
+        Note that this function only rename the key in the batch
+        """
+        def validate_input(keys):
+            if keys is not None:
+                if isinstance(keys, str):
+                    keys = [keys]
+                elif isinstance(keys, list):
+                    pass
+                else:
+                    raise TypeError(f"keys must be a list or a string, but got {type(keys)}")
+            return keys
+        old_keys = validate_input(old_keys)
+        new_keys = validate_input(new_keys)
+        if len(new_keys) != len(old_keys):
+            raise ValueError(
+                f"new_keys and old_keys must have the same length, but got {len(new_keys)} and {len(old_keys)}"
+            )
+        self.batch.rename_key_(tuple(old_keys), tuple(new_keys))
+        return self
+    def union(self, other: "DataProto") -> "DataProto":
+        """Union with another DataProto. Union batch and meta_info separately.
+        Throw an error if
+        - there are conflict keys in batch and they are not equal
+        - the batch size of two data batch is not the same
+        - there are conflict keys in meta_info and they are not the same.
+        Args:
+            other (DataProto): another DataProto to union
+        Returns:
+            DataProto: the DataProto after union
+        """
+        self.batch = union_tensor_dict(self.batch, other.batch)
+        self.non_tensor_batch = union_numpy_dict(self.non_tensor_batch, other.non_tensor_batch)
+        self.meta_info = union_two_dict(self.meta_info, other.meta_info)
+        return self
+    def make_iterator(
+        self, mini_batch_size: int, epochs: int, seed: int = None, dataloader_kwargs: Dict[str, Any] = None
+    ):
+        """Make an iterator from the DataProto. This is built upon that TensorDict can be used as a normal Pytorch
+        dataset. See https://pytorch.org/tensordict/tutorials/data_fashion for more details.
+        Args:
+            mini_batch_size (int): mini-batch size when iterating the dataset. We require that
+                ``batch.batch_size[0] % mini_batch_size == 0``
+            epochs (int): number of epochs when iterating the dataset.
+            dataloader_kwargs: internally, it returns a DataLoader over the batch.
+                The dataloader_kwargs is the kwargs passed to the DataLoader
+        Returns:
+            Iterator: an iterator that yields a mini-batch data at a time. The total number of iteration steps is
+            ``self.batch.batch_size * epochs // mini_batch_size``
+        """
+        assert self.batch.batch_size[0] % mini_batch_size == 0, f"{self.batch.batch_size[0]} % {mini_batch_size} != 0"
+        # we can directly create a dataloader from TensorDict
+        if dataloader_kwargs is None:
+            dataloader_kwargs = {}
+        if seed is not None:
+            generator = torch.Generator()
+            generator.manual_seed(seed)
+        else:
+            generator = None
+        assert isinstance(dataloader_kwargs, Dict)
+        train_dataloader = DataLoader(
+            dataset=self, batch_size=mini_batch_size, collate_fn=collate_fn, generator=generator, **dataloader_kwargs
+        )
+        def get_data():
+            for _ in range(epochs):
+                for d in train_dataloader:
+                    d.meta_info = self.meta_info
+                    yield d
+        return iter(get_data())
+    def chunk(self, chunks: int) -> List["DataProto"]:
+        """Split the batch among dim=0 into chunks. The meta_info is passed to each DataProto after split.
+        Args:
+            chunks (int): the number of chunks to split on dim=0
+        Returns:
+            List[DataProto]: a list of DataProto after splitting
+        """
+        assert len(self) % chunks == 0, (
+            f"only support equal chunk. Got size of DataProto {len(self)} and chunk {chunks}."
+        )
+        if self.batch is not None:
+            batch_lst = self.batch.chunk(chunks=chunks, dim=0)
+        else:
+            batch_lst = [None for _ in range(chunks)]
+        non_tensor_batch_lst = [{} for _ in range(chunks)]
+        for key, value in self.non_tensor_batch.items():
+            assert isinstance(value, np.ndarray)
+            non_tensor_lst = np.array_split(value, chunks)
+            assert len(non_tensor_lst) == chunks
+            for i in range(chunks):
+                non_tensor_batch_lst[i][key] = non_tensor_lst[i]
+        output = []
+        for i in range(chunks):
+            output.append(
+                DataProto(batch=batch_lst[i], non_tensor_batch=non_tensor_batch_lst[i], meta_info=self.meta_info)
+            )
+        return output
+    def split(self, split_size: int) -> List["DataProto"]:
+        chunks = len(self) // split_size
+        return self.chunk(chunks)
+    @staticmethod
+    def concat(data: List["DataProto"]) -> "DataProto":
+        """Concat a list of DataProto. The batch is concatenated among dim=0.
+        The meta_info is assumed to be identical and will use the first one.
+        Args:
+            data (List[DataProto]): list of DataProto
+        Returns:
+            DataProto: concatenated DataProto
+        """
+        batch_lst = [batch.batch for batch in data]
+        if batch_lst[0] is not None:
+            new_batch = torch.cat(batch_lst, dim=0)
+        else:
+            new_batch = None
+        non_tensor_batch = batch_collate([d.non_tensor_batch for d in data])
+        for key, value in non_tensor_batch.items():
+            non_tensor_batch[key] = np.concatenate(value, axis=0)
+        return DataProto(batch=new_batch, non_tensor_batch=non_tensor_batch, meta_info=data[0].meta_info)
+    def reorder(self, indices: torch.Tensor) -> None:
+        """
+        Note that this operation is in-place
+        """
+        indices_np = indices.detach().numpy()
+        self.batch = self.batch[indices]
+        self.non_tensor_batch = {key: val[indices_np] for key, val in self.non_tensor_batch.items()}
+    def repeat(self, repeat_times: int = 2, interleave: bool = True) -> "DataProto":
+        """
+        Repeat the batch data a specified number of times.
+        Args:
+            repeat_times (int): Number of times to repeat the data.
+            interleave (bool): Whether to interleave the repeated data.
+        Returns:
+            DataProto: A new DataProto with repeated data.
+        """
+        if self.batch is not None:
+            if interleave:
+                # Interleave the data
+                repeated_tensors = {
+                    key: tensor.repeat_interleave(repeat_times, dim=0) for key, tensor in self.batch.items()
+                }
+            else:
+                # Stack the data
+                repeated_tensors = {
+                    key: tensor.unsqueeze(0).expand(repeat_times, *tensor.shape).reshape(-1, *tensor.shape[1:])
+                    for key, tensor in self.batch.items()
+                }
+            repeated_batch = TensorDict(
+                source=repeated_tensors,
+                batch_size=(self.batch.batch_size[0] * repeat_times,),
+            )
+        else:
+            repeated_batch = None
+        repeated_non_tensor_batch = {}
+        for key, value in self.non_tensor_batch.items():
+            if interleave:
+                repeated_non_tensor_batch[key] = np.repeat(value, repeat_times, axis=0)
+            else:
+                repeated_non_tensor_batch[key] = np.tile(value, (repeat_times,) + (1,) * (value.ndim - 1))
+        return DataProto(
+            batch=repeated_batch,
+            non_tensor_batch=repeated_non_tensor_batch,
+            meta_info=self.meta_info,
+        )
+@dataclass
+class DataProtoFuture:
+    """
+    DataProtoFuture aims to eliminate actual data fetching on driver. By doing so, the driver doesn't have to wait
+    for data so that asynchronous execution becomes possible.
+    DataProtoFuture contains a list of futures from another WorkerGroup of size world_size.
+    - collect_fn is a Callable that reduces the list of futures to a DataProto
+    - dispatch_fn is a Callable that partitions the DataProto into a list of DataProto of size world_size and then select
+    Potential issue: we can optimize dispatch_fn(collect_fn) such that only needed data is fetched on destination
+    - DataProtoFuture only supports directly passing from the output of a method to another input. You can't perform any
+    operation on the DataProtoFuture in driver.
+    """
+    collect_fn: Callable
+    futures: List[ray.ObjectRef]
+    dispatch_fn: Callable = None
+    @staticmethod
+    def concat(data: List[ray.ObjectRef]) -> "DataProtoFuture":
+        output = DataProtoFuture(collect_fn=DataProto.concat, futures=data)
+        return output
+    def chunk(self, chunks: int) -> List["DataProtoFuture"]:
+        from functools import partial
+        arg_future_lst = []
+        for i in range(chunks):
+            # note that we can't directly pass i and chunks
+            def dispatch_fn(x, i, chunks):
+                return x.chunk(chunks=chunks)[i]
+            arg_future = DataProtoFuture(
+                collect_fn=self.collect_fn, dispatch_fn=partial(dispatch_fn, i=i, chunks=chunks), futures=self.futures
+            )
+            arg_future_lst.append(arg_future)
+        return arg_future_lst
+    def get(self):
+        outputs = ray.get(self.futures)  # dp_size.
+        for output in outputs:
+            assert isinstance(output, DataProto)
+        outputs = self.collect_fn(outputs)  # select dp, concat
+        if self.dispatch_fn is not None:
+            outputs = self.dispatch_fn(outputs)  # split in batch dim, select using dp
+        return outputs
+def allgather_dict_tensors(
+    tensors: Union[Dict[str, torch.Tensor], TensorDict], size: int, group: ProcessGroup, dim: int = 0
+) -> Union[Dict[str, torch.Tensor], TensorDict]:
+    """
+    TODO: optimize this.
+    - We can use async ops
+    - We can use only one allgather
+    """
+    if isinstance(tensors, TensorDict):
+        is_tensor_dict = True
+        tensors_as_dict = tensors.to_dict()
+    else:
+        tensors_as_dict = tensors
+        is_tensor_dict = False
+    output = {}
+    sorted_keys = sorted(tensors_as_dict.keys())
+    for key in sorted_keys:
+        val = tensors_as_dict[key]
+        output[key] = [torch.empty_like(val) for _ in range(size)]
+        torch.distributed.all_gather(output[key], val, group=group, async_op=False)
+        output[key] = torch.cat(output[key], dim=dim)
+    if is_tensor_dict:
+        output = TensorDict(source=output, batch_size=tensors.batch_size[0] * size)
+    return output
+def all_gather_data_proto(data: DataProto, size: int, group: ProcessGroup) -> None:
+    # Note that this is an inplace operator just like torch.distributed.all_gather
+    prev_device = data.batch.device
+    data.batch = data.batch.cuda(device=torch.cuda.current_device())
+    data.batch = allgather_dict_tensors(data.batch.contiguous(), size=size, group=group, dim=0)
+    data.batch = data.batch.to(prev_device)
+    # all gather non_tensor_batch
+    all_non_tensor_batch = [None for _ in range(size)]
+    torch.distributed.all_gather_object(all_non_tensor_batch, data.non_tensor_batch, group=group)
+    data.non_tensor_batch = {k: np.concatenate([d[k] for d in all_non_tensor_batch]) for k in data.non_tensor_batch}

easyr1/verl/single_controller/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

easyr1/verl/single_controller/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (173 Bytes). View file

easyr1/verl/single_controller/base/__init__.py ADDED Viewed

	@@ -0,0 +1,19 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .worker import Worker
+from .worker_group import ClassWithInitArgs, ResourcePool, WorkerGroup
+__all__ = ["ClassWithInitArgs", "ResourcePool", "Worker", "WorkerGroup"]

easyr1/verl/single_controller/base/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (409 Bytes). View file

easyr1/verl/single_controller/base/__pycache__/decorator.cpython-311.pyc ADDED Viewed

Binary file (10.5 kB). View file

easyr1/verl/single_controller/base/__pycache__/worker.cpython-311.pyc ADDED Viewed

Binary file (11 kB). View file

easyr1/verl/single_controller/base/__pycache__/worker_group.cpython-311.pyc ADDED Viewed

Binary file (10.7 kB). View file

easyr1/verl/single_controller/base/decorator.py ADDED Viewed

	@@ -0,0 +1,213 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from enum import Enum, auto
+from functools import wraps
+from types import FunctionType
+from typing import TYPE_CHECKING, Dict, List, Literal, Union
+import ray
+from ...protocol import DataProto, DataProtoFuture
+if TYPE_CHECKING:
+    from .worker_group import WorkerGroup
+# here we add a magic number of avoid user-defined function already have this attribute
+MAGIC_ATTR = "attrs_3141562937"
+class Dispatch(Enum):
+    RANK_ZERO = auto()
+    ONE_TO_ALL = auto()
+    ALL_TO_ALL = auto()
+    DP_COMPUTE = auto()
+    DP_COMPUTE_PROTO = auto()
+    DP_COMPUTE_PROTO_WITH_FUNC = auto()
+    DP_COMPUTE_METRIC = auto()
+class Execute(Enum):
+    ALL = 0
+    RANK_ZERO = 1
+def _split_args_kwargs_data_proto(chunks: int, *args, **kwargs):
+    splitted_args = []
+    for arg in args:
+        assert isinstance(arg, (DataProto, DataProtoFuture))
+        splitted_args.append(arg.chunk(chunks=chunks))
+    splitted_kwargs = {}
+    for key, value in kwargs.items():
+        assert isinstance(value, (DataProto, DataProtoFuture))
+        splitted_kwargs[key] = value.chunk(chunks=chunks)
+    return splitted_args, splitted_kwargs
+def dispatch_one_to_all(worker_group: "WorkerGroup", *args, **kwargs):
+    args = tuple([arg] * worker_group.world_size for arg in args)
+    kwargs = {k: [v] * worker_group.world_size for k, v in kwargs.items()}
+    return args, kwargs
+def dispatch_all_to_all(worker_group: "WorkerGroup", *args, **kwargs):
+    return args, kwargs
+def collect_all_to_all(worker_group: "WorkerGroup", output):
+    return output
+def _concat_data_proto_or_future(outputs: List[DataProto]) -> DataProto:
+    # make sure all the elements in output has the same type
+    for output in outputs:
+        assert type(output) is type(outputs[0])
+    output = outputs[0]
+    if isinstance(output, DataProto):
+        return DataProto.concat(outputs)
+    elif isinstance(output, ray.ObjectRef):
+        return DataProtoFuture.concat(outputs)
+    else:
+        raise NotImplementedError
+def dispatch_dp_compute(worker_group: "WorkerGroup", *args, **kwargs):
+    for arg in args:
+        assert isinstance(arg, (tuple, list)) and len(arg) == worker_group.world_size
+    for value in kwargs.values():
+        assert isinstance(value, (tuple, list)) and len(value) == worker_group.world_size
+    return args, kwargs
+def collect_dp_compute(worker_group: "WorkerGroup", outputs: List[DataProto]) -> List[DataProto]:
+    assert len(outputs) == worker_group.world_size
+    return outputs
+def dispatch_dp_compute_data_proto(worker_group: "WorkerGroup", *args, **kwargs):
+    splitted_args, splitted_kwargs = _split_args_kwargs_data_proto(worker_group.world_size, *args, **kwargs)
+    return splitted_args, splitted_kwargs
+def dispatch_dp_compute_data_proto_with_func(worker_group: "WorkerGroup", *args, **kwargs):
+    assert type(args[0]) is FunctionType  # NOTE: The first one args is a function!
+    splitted_args, splitted_kwargs = _split_args_kwargs_data_proto(worker_group.world_size, *args[1:], **kwargs)
+    splitted_args_with_func = [[args[0]] * worker_group.world_size] + splitted_args
+    return splitted_args_with_func, splitted_kwargs
+def collect_dp_compute_data_proto(worker_group: "WorkerGroup", outputs: List[DataProto]) -> DataProto:
+    for output in outputs:
+        assert isinstance(output, (DataProto, ray.ObjectRef)), f"Expect a DataProto, but got {type(output)}"
+    outputs = collect_dp_compute(worker_group, outputs)
+    return _concat_data_proto_or_future(outputs)
+def get_predefined_dispatch_fn(dispatch_mode: Dispatch):
+    predefined_dispatch_mode_fn = {
+        Dispatch.ONE_TO_ALL: {
+            "dispatch_fn": dispatch_one_to_all,
+            "collect_fn": collect_all_to_all,
+        },
+        Dispatch.ALL_TO_ALL: {
+            "dispatch_fn": dispatch_all_to_all,
+            "collect_fn": collect_all_to_all,
+        },
+        Dispatch.DP_COMPUTE: {
+            "dispatch_fn": dispatch_dp_compute,
+            "collect_fn": collect_dp_compute,
+        },
+        Dispatch.DP_COMPUTE_PROTO: {
+            "dispatch_fn": dispatch_dp_compute_data_proto,
+            "collect_fn": collect_dp_compute_data_proto,
+        },
+        Dispatch.DP_COMPUTE_PROTO_WITH_FUNC: {
+            "dispatch_fn": dispatch_dp_compute_data_proto_with_func,
+            "collect_fn": collect_dp_compute_data_proto,
+        },
+        Dispatch.DP_COMPUTE_METRIC: {
+            "dispatch_fn": dispatch_dp_compute_data_proto,
+            "collect_fn": collect_dp_compute,
+        },
+    }
+    return predefined_dispatch_mode_fn[dispatch_mode]
+def get_predefined_execute_fn(execute_mode: Execute):
+    """
+    Note that here we only asks execute_all and execute_rank_zero to be implemented
+    Leave the choice of how these two functions handle argument 'blocking' to users
+    """
+    predefined_execute_mode_fn = {
+        Execute.ALL: {"execute_fn_name": "execute_all"},
+        Execute.RANK_ZERO: {"execute_fn_name": "execute_rank_zero"},
+    }
+    return predefined_execute_mode_fn[execute_mode]
+def _check_dispatch_mode(dispatch_mode: Union[Dispatch, Dict[Literal["dispatch_fn", "collect_fn"], FunctionType]]):
+    assert isinstance(dispatch_mode, (Dispatch, dict)), (
+        f"dispatch_mode must be a Dispatch or a Dict. Got {dispatch_mode}"
+    )
+    if isinstance(dispatch_mode, dict):
+        necessary_keys = ["dispatch_fn", "collect_fn"]
+        for key in necessary_keys:
+            assert key in dispatch_mode, f"key {key} should be in dispatch_mode if it is a dictionary"
+def _check_execute_mode(execute_mode: Execute):
+    assert isinstance(execute_mode, Execute), f"execute_mode must be a Execute. Got {execute_mode}"
+def _materialize_futures(*args, **kwargs):
+    new_args = []
+    for arg in args:
+        if isinstance(arg, DataProtoFuture):
+            arg = arg.get()
+        # add more type to materialize
+        new_args.append(arg)
+    for key, value in kwargs.items():
+        if isinstance(value, DataProtoFuture):
+            kwargs[key] = value.get()
+    new_args = tuple(new_args)
+    return new_args, kwargs
+def register(dispatch_mode=Dispatch.ALL_TO_ALL, execute_mode=Execute.ALL, blocking=True, materialize_futures=True):
+    _check_dispatch_mode(dispatch_mode=dispatch_mode)
+    _check_execute_mode(execute_mode=execute_mode)
+    def decorator(func):
+        @wraps(func)
+        def inner(*args, **kwargs):
+            if materialize_futures:
+                args, kwargs = _materialize_futures(*args, **kwargs)
+            return func(*args, **kwargs)
+        attrs = {"dispatch_mode": dispatch_mode, "execute_mode": execute_mode, "blocking": blocking}
+        setattr(inner, MAGIC_ATTR, attrs)
+        return inner
+    return decorator

easyr1/verl/single_controller/base/register_center/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

easyr1/verl/single_controller/base/register_center/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (194 Bytes). View file

easyr1/verl/single_controller/base/register_center/__pycache__/ray.cpython-311.pyc ADDED Viewed

Binary file (1.19 kB). View file

easyr1/verl/single_controller/base/register_center/ray.py ADDED Viewed

	@@ -0,0 +1,28 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import ray
+@ray.remote
+class WorkerGroupRegisterCenter:
+    def __init__(self, rank_zero_info):
+        self.rank_zero_info = rank_zero_info
+    def get_rank_zero_info(self):
+        return self.rank_zero_info
+def create_worker_group_register_center(name, info):
+    return WorkerGroupRegisterCenter.options(name=name).remote(info)

easyr1/verl/single_controller/base/worker.py ADDED Viewed

	@@ -0,0 +1,202 @@

+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+the class for Worker
+"""
+import os
+import socket
+from dataclasses import dataclass
+from typing import Tuple
+import ray
+import torch
+from .decorator import Dispatch, Execute, register
+from .register_center.ray import create_worker_group_register_center
+@dataclass
+class DistRankInfo:
+    tp_rank: int
+    dp_rank: int
+    pp_rank: int
+@dataclass
+class DistGlobalInfo:
+    tp_size: int
+    dp_size: int
+    pp_size: int
+class WorkerHelper:
+    def _get_node_ip(self) -> str:
+        host_ipv4 = os.getenv("MY_HOST_IP", None)
+        host_ipv6 = os.getenv("MY_HOST_IPV6", None)
+        host_ip_by_env = host_ipv4 or host_ipv6
+        host_ip_by_sdk = ray._private.services.get_node_ip_address()
+        host_ip = host_ip_by_env or host_ip_by_sdk
+        return host_ip
+    def _get_free_port(self) -> int:
+        with socket.socket() as sock:
+            sock.bind(("", 0))
+            return sock.getsockname()[1]
+    def get_availale_master_addr_port(self) -> Tuple[str, str]:
+        return self._get_node_ip(), str(self._get_free_port())
+    def _get_pid(self):
+        return
+class WorkerMeta:
+    keys = [
+        "WORLD_SIZE",
+        "RANK",
+        "LOCAL_WORLD_SIZE",
+        "LOCAL_RANK",
+        "MASTER_ADDR",
+        "MASTER_PORT",
+        "CUDA_VISIBLE_DEVICES",
+    ]
+    def __init__(self, store) -> None:
+        self._store = store
+    def to_dict(self):
+        return {f"_{key.lower()}": self._store.get(f"_{key.lower()}", None) for key in WorkerMeta.keys}
+# we assume that in each WorkerGroup, there is a Master Worker
+class Worker(WorkerHelper):
+    """A (distributed) worker."""
+    _world_size: int
+    _rank: int
+    _local_world_size: int
+    _local_rank: int
+    _master_addr: str
+    _master_port: str
+    _cuda_visible_devices: str
+    def __new__(cls, *args, **kwargs):
+        instance = super().__new__(cls)
+        # note that here we use int to distinguish
+        disable_worker_init = int(os.getenv("DISABLE_WORKER_INIT", 0))
+        if disable_worker_init:
+            return instance
+        rank = os.getenv("RANK", None)
+        worker_group_prefix = os.getenv("WG_PREFIX", None)
+        # when decorator @ray.remote applies, __new__ will be called while we don't want to apply _configure_before_init
+        if None not in [rank, worker_group_prefix] and "ActorClass(" not in cls.__name__:
+            instance._configure_before_init(f"{worker_group_prefix}_register_center", int(rank))
+        return instance
+    def _configure_before_init(self, register_center_name: str, rank: int):
+        assert isinstance(rank, int), f"rank must be int, instead of {type(rank)}"
+        if rank == 0:
+            master_addr, master_port = self.get_availale_master_addr_port()
+            rank_zero_info = {
+                "MASTER_ADDR": master_addr,
+                "MASTER_PORT": master_port,
+            }
+            self.register_center = create_worker_group_register_center(name=register_center_name, info=rank_zero_info)
+            os.environ.update(rank_zero_info)
+    def __init__(self, cuda_visible_devices=None) -> None:
+        # construct a meta from envrionment variable. Note that the import must be inside the class because it is executed remotely
+        world_size = int(os.getenv("WORLD_SIZE"))
+        rank = int(os.getenv("RANK"))
+        self._rank = rank
+        self._world_size = world_size
+        if "AMD" in torch.cuda.get_device_name():
+            os.environ["CUDA_VISIBLE_DEVICES"] = os.getenv("ROCR_VISIBLE_DEVICES")
+            os.environ["LOCAL_RANK"] = os.getenv("RAY_LOCAL_RANK")
+            cuda_visible_devices = os.getenv("LOCAL_RANK", "0")
+            torch.cuda.set_device(int(cuda_visible_devices))
+        master_addr = os.getenv("MASTER_ADDR")
+        master_port = os.getenv("MASTER_PORT")
+        local_world_size = int(os.getenv("LOCAL_WORLD_SIZE", "1"))
+        local_rank = int(os.getenv("LOCAL_RANK", "0"))
+        store = {
+            "_world_size": world_size,
+            "_rank": rank,
+            "_local_world_size": local_world_size,
+            "_local_rank": local_rank,
+            "_master_addr": master_addr,
+            "_master_port": master_port,
+        }
+        if cuda_visible_devices is not None:
+            store["_cuda_visible_devices"] = cuda_visible_devices
+        meta = WorkerMeta(store=store)
+        self._configure_with_meta(meta=meta)
+    def _configure_with_meta(self, meta: WorkerMeta):
+        """
+        This function should only be called inside by WorkerGroup
+        """
+        assert isinstance(meta, WorkerMeta)
+        self.__dict__.update(meta.to_dict())  # this is hacky
+        # print(f"__dict__: {self.__dict__}")
+        for key in WorkerMeta.keys:
+            val = self.__dict__.get(f"_{key.lower()}", None)
+            if val is not None:
+                # print(f"set {key} to {val}")
+                os.environ[key] = str(val)
+        os.environ["REDIS_STORE_SERVER_HOST"] = (
+            str(self._master_addr).replace("[", "").replace("]", "") if self._master_addr else ""
+        )
+    def get_master_addr_port(self):
+        return self._master_addr, self._master_port
+    def get_cuda_visible_devices(self):
+        cuda_visible_devices = os.getenv("CUDA_VISIBLE_DEVICES", "not set")
+        return cuda_visible_devices
+    def print_rank0(self, *args, **kwargs):
+        if self.rank == 0:
+            print(*args, **kwargs)
+    @property
+    def world_size(self):
+        return self._world_size
+    @property
+    def rank(self):
+        return self._rank
+    @register(dispatch_mode=Dispatch.DP_COMPUTE_PROTO_WITH_FUNC)
+    def execute_with_func_generator(self, func, *args, **kwargs):
+        ret_proto = func(self, *args, **kwargs)
+        return ret_proto
+    @register(dispatch_mode=Dispatch.ALL_TO_ALL, execute_mode=Execute.RANK_ZERO)
+    def execute_func_rank_zero(self, func, *args, **kwargs):
+        result = func(*args, **kwargs)
+        return result