| """ |
| GRPO test suite |
| """ |
|
|
| import random |
| from pathlib import Path |
|
|
| import pytest |
| import yaml |
| from accelerate.test_utils import execute_subprocess_async |
| from e2e.utils import require_vllm |
| from transformers.testing_utils import get_torch_dist_unique_port |
|
|
| from axolotl.utils.dict import DictDefault |
|
|
|
|
| class TestGRPO: |
| """ |
| Test case for GRPO training using multilpe GPUs |
| """ |
|
|
| def _utils_write_yaml_and_rewards(self, cfg, temp_dir, suffix=""): |
| |
| Path(temp_dir).mkdir(parents=True, exist_ok=True) |
| with open(Path(temp_dir) / "config.yaml", "w", encoding="utf-8") as fout: |
| fout.write(yaml.dump(cfg.to_dict(), Dumper=yaml.Dumper)) |
| with open(f"rewards_{suffix}.py", "w", encoding="utf-8") as fout: |
| fout.write( |
| """import random |
| def rand_reward_func(completions, **kwargs) -> list[float]: |
| return [random.uniform(0, 1) for _ in completions] |
| |
| def oai_gsm8k_transform(cfg, *args, **kwargs): |
| def transform_fn(example, tokenizer=None): |
| label = example["answer"].split("####")[-1].strip().replace(",", "") |
| return { |
| "prompt": [{"role": "user", "content": example["question"]},], |
| "answer": label, |
| } |
| return transform_fn, {"remove_columns": ["question"]} |
| """ |
| ) |
|
|
| @pytest.mark.parametrize( |
| "num_gpus", |
| [1, 2], |
| ) |
| @require_vllm |
| def test_llama_dora(self, temp_dir, num_gpus): |
| rnd_reward_suffix = str(random.randint(1000, 9999)) |
| cfg = DictDefault( |
| { |
| "base_model": "HuggingFaceTB/SmolLM2-135M", |
| "chat_template": "llama3", |
| "rl": "grpo", |
| "trl": { |
| "beta": 0.001, |
| "max_completion_length": 256, |
| "use_vllm": True, |
| "vllm_device": "auto" if num_gpus == 1 else "cuda:1", |
| "vllm_gpu_memory_utilization": 0.15, |
| "num_generations": 4, |
| "reward_funcs": [f"rewards_{rnd_reward_suffix}.rand_reward_func"], |
| }, |
| "datasets": [ |
| { |
| "path": "openai/gsm8k", |
| "name": "main", |
| "type": f"rewards_{rnd_reward_suffix}.oai_gsm8k_transform", |
| }, |
| ], |
| "adapter": "lora", |
| "lora_r": 8, |
| "lora_alpha": 16, |
| "lora_dropout": 0.05, |
| "lora_target_linear": True, |
| "peft_use_dora": True, |
| "flash_attention": True, |
| "sequence_len": 1024, |
| "special_tokens": { |
| "pad_token": "<|endoftext|>", |
| }, |
| "max_steps": 5, |
| "num_epochs": 1, |
| "micro_batch_size": 4, |
| "gradient_accumulation_steps": 2, |
| "warmup_steps": 10, |
| "val_set_size": 0.0, |
| "output_dir": temp_dir, |
| "learning_rate": 0.0001, |
| "optimizer": "adamw_torch_fused", |
| "lr_scheduler": "cosine", |
| "save_safetensors": True, |
| "bf16": "auto", |
| "use_tensorboard": True, |
| } |
| ) |
|
|
| self._utils_write_yaml_and_rewards(cfg, temp_dir, suffix=rnd_reward_suffix) |
|
|
| execute_subprocess_async( |
| [ |
| "axolotl", |
| "train", |
| str(Path(temp_dir) / "config.yaml"), |
| "--num-processes", |
| str(num_gpus), |
| "--main-process-port", |
| f"{get_torch_dist_unique_port()}", |
| ] |
| ) |
|
|
| @pytest.mark.parametrize( |
| "num_gpus", |
| [1, 2], |
| ) |
| @require_vllm |
| def test_llama_fft(self, temp_dir, num_gpus): |
| rnd_reward_suffix = str(random.randint(1000, 9999)) |
| cfg = DictDefault( |
| { |
| "base_model": "HuggingFaceTB/SmolLM2-135M", |
| "chat_template": "llama3", |
| "rl": "grpo", |
| "trl": { |
| "beta": 0.001, |
| "max_completion_length": 256, |
| "use_vllm": True, |
| "vllm_device": "auto" if num_gpus == 1 else "cuda:1", |
| "vllm_gpu_memory_utilization": 0.15, |
| "num_generations": 4, |
| "reward_funcs": [f"rewards_{rnd_reward_suffix}.rand_reward_func"], |
| }, |
| "datasets": [ |
| { |
| "path": "openai/gsm8k", |
| "name": "main", |
| "type": f"rewards_{rnd_reward_suffix}.oai_gsm8k_transform", |
| }, |
| ], |
| "flash_attention": True, |
| "sequence_len": 1024, |
| "special_tokens": { |
| "pad_token": "<|endoftext|>", |
| }, |
| "max_steps": 5, |
| "num_epochs": 1, |
| "micro_batch_size": 4, |
| "gradient_accumulation_steps": 2, |
| "warmup_steps": 10, |
| "val_set_size": 0.0, |
| "output_dir": temp_dir, |
| "learning_rate": 0.0001, |
| "optimizer": "adamw_torch_fused", |
| "lr_scheduler": "cosine", |
| "save_safetensors": True, |
| "bf16": "auto", |
| "use_tensorboard": True, |
| } |
| ) |
|
|
| self._utils_write_yaml_and_rewards(cfg, temp_dir, suffix=rnd_reward_suffix) |
|
|
| execute_subprocess_async( |
| [ |
| "axolotl", |
| "train", |
| str(Path(temp_dir) / "config.yaml"), |
| "--num-processes", |
| str(num_gpus), |
| "--main-process-port", |
| f"{get_torch_dist_unique_port()}", |
| ] |
| ) |
|
|